summaryrefslogtreecommitdiffstats
path: root/src/arrow/java
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
commite6918187568dbd01842d8d1d2c808ce16a894239 (patch)
tree64f88b554b444a49f656b6c656111a145cbbaa28 /src/arrow/java
parentInitial commit. (diff)
downloadceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz
ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/arrow/java')
-rw-r--r--src/arrow/java/README.md164
-rw-r--r--src/arrow/java/adapter/avro/pom.xml59
-rw-r--r--src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrow.java67
-rw-r--r--src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrowConfig.java86
-rw-r--r--src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrowConfigBuilder.java74
-rw-r--r--src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrowUtils.java805
-rw-r--r--src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrowVectorIterator.java186
-rw-r--r--src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroArraysConsumer.java74
-rw-r--r--src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroBooleanConsumer.java43
-rw-r--r--src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroBytesConsumer.java49
-rw-r--r--src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroDoubleConsumer.java42
-rw-r--r--src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroEnumConsumer.java43
-rw-r--r--src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroFixedConsumer.java46
-rw-r--r--src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroFloatConsumer.java42
-rw-r--r--src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroIntConsumer.java42
-rw-r--r--src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroLongConsumer.java42
-rw-r--r--src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroMapConsumer.java79
-rw-r--r--src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroNullConsumer.java39
-rw-r--r--src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroStringConsumer.java48
-rw-r--r--src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroStructConsumer.java76
-rw-r--r--src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroUnionsConsumer.java86
-rw-r--r--src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/BaseAvroConsumer.java65
-rw-r--r--src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/CompositeAvroConsumer.java73
-rw-r--r--src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/Consumer.java71
-rw-r--r--src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/SkipConsumer.java67
-rw-r--r--src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/SkipFunction.java30
-rw-r--r--src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroDateConsumer.java43
-rw-r--r--src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroDecimalConsumer.java88
-rw-r--r--src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroTimeMicroConsumer.java43
-rw-r--r--src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroTimeMillisConsumer.java43
-rw-r--r--src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroTimestampMicrosConsumer.java43
-rw-r--r--src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroTimestampMillisConsumer.java43
-rw-r--r--src/arrow/java/adapter/avro/src/test/java/org/apache/arrow/AvroLogicalTypesTest.java201
-rw-r--r--src/arrow/java/adapter/avro/src/test/java/org/apache/arrow/AvroSkipFieldTest.java626
-rw-r--r--src/arrow/java/adapter/avro/src/test/java/org/apache/arrow/AvroTestBase.java228
-rw-r--r--src/arrow/java/adapter/avro/src/test/java/org/apache/arrow/AvroToArrowIteratorTest.java313
-rw-r--r--src/arrow/java/adapter/avro/src/test/java/org/apache/arrow/AvroToArrowTest.java477
-rw-r--r--src/arrow/java/adapter/avro/src/test/java/org/apache/arrow/TestWriteReadAvroRecord.java93
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/attrs/test_enum_attrs.avsc24
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/attrs/test_fixed_attr.avsc24
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/attrs/test_record_attrs.avsc37
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_date.avsc23
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_decimal_invalid1.avsc25
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_decimal_invalid2.avsc25
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_decimal_invalid3.avsc25
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_decimal_invalid4.avsc26
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_decimal_with_original_bytes.avsc25
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_decimal_with_original_fixed.avsc26
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_time_micros.avsc23
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_time_millis.avsc23
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_timestamp_micros.avsc23
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_timestamp_millis.avsc23
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_array_before.avsc27
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_array_expected.avsc26
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_base1.avsc28
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_base2.avsc29
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_boolean_expected.avsc28
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_bytes_expected.avsc27
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_double_expected.avsc28
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_enum_expected.avsc27
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_fixed_expected.avsc27
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_float_expected.avsc28
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_int_expected.avsc28
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_long_expected.avsc28
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_map_before.avsc27
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_map_expected.avsc26
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_multi_fields_expected.avsc25
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_record_before.avsc38
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_record_expected.avsc25
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_second_level_expected.avsc34
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_single_field_expected.avsc26
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_string_expected.avsc27
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_third_level_expected.avsc45
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_union_before.avsc28
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_union_multi_fields_expected.avsc27
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_union_nullable_field_expected.avsc27
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_union_one_field_expected.avsc27
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/test.avsc27
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/test_array.avsc23
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/test_fixed.avsc23
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/test_large_data.avsc75
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/test_map.avsc23
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/test_nested_record.avsc35
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_boolean.avsc25
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_bytes.avsc25
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_double.avsc25
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_float.avsc25
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_int.avsc25
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_long.avsc25
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_string.avsc25
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_union.avsc25
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_boolean.avsc22
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_bytes.avsc22
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_double.avsc22
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_enum.avsc23
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_float.avsc22
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_int.avsc22
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_long.avsc22
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_string.avsc22
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/test_record.avsc27
-rw-r--r--src/arrow/java/adapter/avro/src/test/resources/schema/test_union.avsc25
-rw-r--r--src/arrow/java/adapter/jdbc/pom.xml96
-rw-r--r--src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/ArrowVectorIterator.java186
-rw-r--r--src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/Constants.java31
-rw-r--r--src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfo.java114
-rw-r--r--src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java103
-rw-r--r--src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java296
-rw-r--r--src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java201
-rw-r--r--src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java341
-rw-r--r--src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ArrayConsumer.java134
-rw-r--r--src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BaseConsumer.java54
-rw-r--r--src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BigIntConsumer.java87
-rw-r--r--src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumer.java135
-rw-r--r--src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BitConsumer.java87
-rw-r--r--src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BlobConsumer.java72
-rw-r--r--src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ClobConsumer.java161
-rw-r--r--src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/CompositeJdbcConsumer.java76
-rw-r--r--src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DateConsumer.java116
-rw-r--r--src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DecimalConsumer.java88
-rw-r--r--src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DoubleConsumer.java87
-rw-r--r--src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/FloatConsumer.java87
-rw-r--r--src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/IntConsumer.java87
-rw-r--r--src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/JdbcConsumer.java46
-rw-r--r--src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/NullConsumer.java38
-rw-r--r--src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/SmallIntConsumer.java87
-rw-r--r--src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimeConsumer.java112
-rw-r--r--src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampConsumer.java89
-rw-r--r--src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampTZConsumer.java97
-rw-r--r--src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TinyIntConsumer.java87
-rw-r--r--src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/VarCharConsumer.java86
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/AbstractJdbcToArrowTest.java322
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfoTest.java45
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java161
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowTestHelper.java381
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/Table.java234
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/AbstractConsumerTest.java39
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumerTest.java117
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcAliasToArrowTest.java140
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowArrayTest.java375
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java155
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java248
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java277
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowOptionalColumnsTest.java90
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java1350
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java168
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowVectorIteratorTest.java467
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_all_datatypes_h2.yml121
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_all_datatypes_null_h2.yml51
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_all_datatypes_selected_null_rows_h2.yml83
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_bigint_h2.yml46
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_binary_h2.yml46
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_bit_h2.yml46
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_blob_h2.yml46
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_bool_h2.yml46
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_char_h2.yml46
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_charset_ch_h2.yml43
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_charset_h2.yml53
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_charset_jp_h2.yml43
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_charset_kr_h2.yml43
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_clob_h2.yml46
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_date_h2.yml48
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_decimal_h2.yml46
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_double_h2.yml46
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_est_date_h2.yml48
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_est_time_h2.yml48
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_est_timestamp_h2.yml49
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_gmt_date_h2.yml48
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_gmt_time_h2.yml48
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_gmt_timestamp_h2.yml48
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_int_h2.yml46
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_null_and_notnull.yml26
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_null_h2.yml36
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_pst_date_h2.yml48
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_pst_time_h2.yml48
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_pst_timestamp_h2.yml48
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_real_h2.yml46
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_selected_datatypes_null_h2.yml46
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_smallint_h2.yml46
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_time_h2.yml46
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_timestamp_h2.yml46
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_tinyint_h2.yml46
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_varchar_h2.yml46
-rw-r--r--src/arrow/java/adapter/jdbc/src/test/resources/logback.xml28
-rw-r--r--src/arrow/java/adapter/orc/CMakeLists.txt43
-rw-r--r--src/arrow/java/adapter/orc/pom.xml113
-rw-r--r--src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcFieldNode.java45
-rw-r--r--src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcJniUtils.java62
-rw-r--r--src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcMemoryJniWrapper.java77
-rw-r--r--src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReader.java90
-rw-r--r--src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReaderJniWrapper.java79
-rw-r--r--src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcRecordBatch.java47
-rw-r--r--src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReferenceManager.java121
-rw-r--r--src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReader.java109
-rw-r--r--src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReaderJniWrapper.java45
-rw-r--r--src/arrow/java/adapter/orc/src/test/java/org/apache/arrow/adapter/orc/OrcReaderTest.java104
-rw-r--r--src/arrow/java/algorithm/pom.xml55
-rw-r--r--src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/DeduplicationUtils.java96
-rw-r--r--src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/VectorRunDeduplicator.java108
-rw-r--r--src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryBuilder.java72
-rw-r--r--src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryEncoder.java39
-rw-r--r--src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableBasedDictionaryBuilder.java153
-rw-r--r--src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableDictionaryEncoder.java146
-rw-r--r--src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/LinearDictionaryEncoder.java112
-rw-r--r--src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchDictionaryEncoder.java100
-rw-r--r--src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchTreeBasedDictionaryBuilder.java146
-rw-r--r--src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/misc/PartialSumUtils.java119
-rw-r--r--src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/rank/VectorRank.java89
-rw-r--r--src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java190
-rw-r--r--src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorRangeSearcher.java108
-rw-r--r--src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorSearcher.java88
-rw-r--r--src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/CompositeVectorComparator.java71
-rw-r--r--src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java431
-rw-r--r--src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java169
-rw-r--r--src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java80
-rw-r--r--src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InPlaceVectorSorter.java37
-rw-r--r--src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/IndexSorter.java180
-rw-r--r--src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InsertionSorter.java74
-rw-r--r--src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OffHeapIntStack.java72
-rw-r--r--src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OutOfPlaceVectorSorter.java37
-rw-r--r--src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/StableVectorComparator.java66
-rw-r--r--src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VariableWidthOutOfPlaceVectorSorter.java93
-rw-r--r--src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java123
-rw-r--r--src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestDeduplicationUtils.java135
-rw-r--r--src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestVectorRunDeduplicator.java131
-rw-r--r--src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableBasedDictionaryBuilder.java202
-rw-r--r--src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java350
-rw-r--r--src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestLinearDictionaryEncoder.java350
-rw-r--r--src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchDictionaryEncoder.java357
-rw-r--r--src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchTreeBasedDictionaryBuilder.java221
-rw-r--r--src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/misc/TestPartialSumUtils.java138
-rw-r--r--src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/rank/TestVectorRank.java145
-rw-r--r--src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestParallelSearcher.java150
-rw-r--r--src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorRangeSearcher.java195
-rw-r--r--src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java299
-rw-r--r--src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestCompositeVectorComparator.java112
-rw-r--r--src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java393
-rw-r--r--src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java240
-rw-r--r--src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java365
-rw-r--r--src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthSorting.java172
-rw-r--r--src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestIndexSorter.java205
-rw-r--r--src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestInsertionSorter.java117
-rw-r--r--src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOffHeapIntStack.java67
-rw-r--r--src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestSortingUtil.java166
-rw-r--r--src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestStableVectorComparator.java137
-rw-r--r--src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java99
-rw-r--r--src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthSorting.java165
-rw-r--r--src/arrow/java/api-changes.md32
-rw-r--r--src/arrow/java/c/CMakeLists.txt51
-rw-r--r--src/arrow/java/c/README.md54
-rw-r--r--src/arrow/java/c/pom.xml77
-rw-r--r--src/arrow/java/c/src/main/cpp/abi.h103
-rw-r--r--src/arrow/java/c/src/main/cpp/jni_wrapper.cc263
-rw-r--r--src/arrow/java/c/src/main/java/org/apache/arrow/c/ArrayExporter.java149
-rw-r--r--src/arrow/java/c/src/main/java/org/apache/arrow/c/ArrayImporter.java152
-rw-r--r--src/arrow/java/c/src/main/java/org/apache/arrow/c/ArrowArray.java185
-rw-r--r--src/arrow/java/c/src/main/java/org/apache/arrow/c/ArrowSchema.java171
-rw-r--r--src/arrow/java/c/src/main/java/org/apache/arrow/c/BaseStruct.java43
-rw-r--r--src/arrow/java/c/src/main/java/org/apache/arrow/c/CDataDictionaryProvider.java72
-rw-r--r--src/arrow/java/c/src/main/java/org/apache/arrow/c/CDataReferenceManager.java124
-rw-r--r--src/arrow/java/c/src/main/java/org/apache/arrow/c/Data.java317
-rw-r--r--src/arrow/java/c/src/main/java/org/apache/arrow/c/Flags.java51
-rw-r--r--src/arrow/java/c/src/main/java/org/apache/arrow/c/Format.java340
-rw-r--r--src/arrow/java/c/src/main/java/org/apache/arrow/c/Metadata.java102
-rw-r--r--src/arrow/java/c/src/main/java/org/apache/arrow/c/NativeUtil.java139
-rw-r--r--src/arrow/java/c/src/main/java/org/apache/arrow/c/SchemaExporter.java132
-rw-r--r--src/arrow/java/c/src/main/java/org/apache/arrow/c/SchemaImporter.java110
-rw-r--r--src/arrow/java/c/src/main/java/org/apache/arrow/c/jni/JniLoader.java95
-rw-r--r--src/arrow/java/c/src/main/java/org/apache/arrow/c/jni/JniWrapper.java47
-rw-r--r--src/arrow/java/c/src/main/java/org/apache/arrow/c/jni/PrivateData.java31
-rw-r--r--src/arrow/java/c/src/main/java/org/apache/arrow/vector/StructVectorLoader.java143
-rw-r--r--src/arrow/java/c/src/main/java/org/apache/arrow/vector/StructVectorUnloader.java110
-rw-r--r--src/arrow/java/c/src/test/java/org/apache/arrow/c/DictionaryTest.java219
-rw-r--r--src/arrow/java/c/src/test/java/org/apache/arrow/c/FlagsTest.java76
-rw-r--r--src/arrow/java/c/src/test/java/org/apache/arrow/c/FormatTest.java140
-rw-r--r--src/arrow/java/c/src/test/java/org/apache/arrow/c/MetadataTest.java104
-rw-r--r--src/arrow/java/c/src/test/java/org/apache/arrow/c/NativeUtilTest.java82
-rw-r--r--src/arrow/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java795
-rw-r--r--src/arrow/java/compression/pom.xml52
-rw-r--r--src/arrow/java/compression/src/main/java/org/apache/arrow/compression/CommonsCompressionFactory.java43
-rw-r--r--src/arrow/java/compression/src/main/java/org/apache/arrow/compression/Lz4CompressionCodec.java89
-rw-r--r--src/arrow/java/compression/src/main/java/org/apache/arrow/compression/ZstdCompressionCodec.java74
-rw-r--r--src/arrow/java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodec.java213
-rw-r--r--src/arrow/java/dataset/CMakeLists.txt43
-rw-r--r--src/arrow/java/dataset/pom.xml134
-rw-r--r--src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/file/FileFormat.java36
-rw-r--r--src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/file/FileSystemDatasetFactory.java38
-rw-r--r--src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/file/JniWrapper.java47
-rw-r--r--src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/DirectReservationListener.java97
-rw-r--r--src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/JniLoader.java94
-rw-r--r--src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/JniWrapper.java107
-rw-r--r--src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeContext.java53
-rw-r--r--src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeDataset.java56
-rw-r--r--src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeDatasetFactory.java104
-rw-r--r--src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeInstanceReleasedException.java31
-rw-r--r--src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeMemoryPool.java76
-rw-r--r--src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeRecordBatchHandle.java106
-rw-r--r--src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeScanTask.java46
-rw-r--r--src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeScanner.java170
-rw-r--r--src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/ReservationListener.java36
-rw-r--r--src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/ScanOptions.java72
-rw-r--r--src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/ScanTask.java42
-rw-r--r--src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/Scanner.java41
-rw-r--r--src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/source/Dataset.java35
-rw-r--r--src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/source/DatasetFactory.java51
-rw-r--r--src/arrow/java/dataset/src/main/java/org/apache/arrow/memory/NativeUnderlyingMemory.java81
-rw-r--r--src/arrow/java/dataset/src/test/java/org/apache/arrow/dataset/ParquetWriteSupport.java123
-rw-r--r--src/arrow/java/dataset/src/test/java/org/apache/arrow/dataset/TestDataset.java97
-rw-r--r--src/arrow/java/dataset/src/test/java/org/apache/arrow/dataset/file/TestFileSystemDataset.java338
-rw-r--r--src/arrow/java/dataset/src/test/java/org/apache/arrow/dataset/file/TestFileSystemDatasetFactory.java48
-rw-r--r--src/arrow/java/dataset/src/test/java/org/apache/arrow/dataset/jni/TestNativeDataset.java33
-rw-r--r--src/arrow/java/dataset/src/test/java/org/apache/arrow/dataset/jni/TestReservationListener.java88
-rw-r--r--src/arrow/java/dataset/src/test/java/org/apache/arrow/memory/TestNativeUnderlyingMemory.java110
-rw-r--r--src/arrow/java/dataset/src/test/resources/avroschema/user.avsc26
-rw-r--r--src/arrow/java/dev/checkstyle/checkstyle.license16
-rw-r--r--src/arrow/java/dev/checkstyle/checkstyle.xml280
-rw-r--r--src/arrow/java/dev/checkstyle/suppressions.xml42
-rw-r--r--src/arrow/java/flight/flight-core/README.md95
-rw-r--r--src/arrow/java/flight/flight-core/pom.xml392
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Action.java61
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ActionType.java70
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ArrowMessage.java560
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/AsyncPutListener.java72
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/BackpressureStrategy.java172
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CallHeaders.java65
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CallInfo.java33
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CallOption.java24
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CallOptions.java62
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CallStatus.java143
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Criteria.java58
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/DictionaryUtils.java127
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ErrorFlightMetadata.java81
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightBindingService.java174
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightCallHeaders.java111
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClient.java721
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClientMiddleware.java52
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightConstants.java29
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightDescriptor.java180
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightEndpoint.java106
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightInfo.java208
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightMethod.java64
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightProducer.java164
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightRuntimeException.java46
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightServer.java399
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightServerMiddleware.java100
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightService.java427
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightStatusCode.java82
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightStream.java505
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/HeaderCallOption.java52
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Location.java158
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/LocationSchemes.java32
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/NoOpFlightProducer.java61
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/NoOpStreamListener.java49
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/OutboundStreamListener.java123
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/OutboundStreamListenerImpl.java132
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/PutResult.java96
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/RequestContext.java51
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Result.java50
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SchemaResult.java96
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ServerHeaderMiddleware.java65
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/StreamPipe.java118
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SyncPutListener.java122
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Ticket.java102
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/AuthConstants.java51
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/BasicClientAuthHandler.java58
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/BasicServerAuthHandler.java74
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ClientAuthHandler.java55
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ClientAuthInterceptor.java73
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ClientAuthWrapper.java162
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ServerAuthHandler.java72
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ServerAuthInterceptor.java85
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ServerAuthWrapper.java144
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/Auth2Constants.java31
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/AuthUtilities.java47
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/BasicAuthCredentialWriter.java44
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/BasicCallHeaderAuthenticator.java88
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/BearerCredentialWriter.java39
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/BearerTokenAuthenticator.java62
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/CallHeaderAuthenticator.java86
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/ClientBearerHeaderHandler.java36
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/ClientHandshakeWrapper.java100
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/ClientHeaderHandler.java43
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/ClientIncomingAuthHeaderMiddleware.java78
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/GeneratedBearerTokenAuthenticator.java128
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/ServerCallHeaderAuthMiddleware.java74
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/client/ClientCookieMiddleware.java130
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/ExampleFlightServer.java93
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/ExampleTicket.java141
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/FlightHolder.java131
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/InMemoryStore.java176
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/Stream.java177
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/integration/AuthBasicProtoScenario.java97
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/integration/IntegrationAssertions.java74
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/integration/IntegrationTestClient.java197
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/integration/IntegrationTestServer.java97
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/integration/MiddlewareScenario.java168
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/integration/Scenario.java45
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/integration/Scenarios.java90
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/AddWritableBuffer.java128
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/CallCredentialAdapter.java53
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/ClientInterceptorAdapter.java149
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/ContextPropagatingExecutorService.java117
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/CredentialCallOption.java41
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/GetReadableBuffer.java99
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/MetadataAdapter.java98
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/RequestContextAdapter.java57
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/ServerInterceptorAdapter.java145
-rw-r--r--src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/StatusUtils.java255
-rw-r--r--src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/FlightTestUtil.java150
-rw-r--r--src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestApplicationMetadata.java329
-rw-r--r--src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestAuth.java93
-rw-r--r--src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestBackPressure.java262
-rw-r--r--src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestBasicOperation.java567
-rw-r--r--src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestCallOptions.java191
-rw-r--r--src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestClientMiddleware.java359
-rw-r--r--src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestDictionaryUtils.java91
-rw-r--r--src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestDoExchange.java536
-rw-r--r--src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestErrorMetadata.java143
-rw-r--r--src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestFlightClient.java225
-rw-r--r--src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestFlightService.java125
-rw-r--r--src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestLargeMessage.java165
-rw-r--r--src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestLeak.java182
-rw-r--r--src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestMetadataVersion.java319
-rw-r--r--src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestServerMiddleware.java360
-rw-r--r--src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestServerOptions.java176
-rw-r--r--src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestTls.java145
-rw-r--r--src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/auth/TestBasicAuth.java158
-rw-r--r--src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/auth2/TestBasicAuth2.java232
-rw-r--r--src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/client/TestCookieHandling.java267
-rw-r--r--src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/example/TestExampleServer.java117
-rw-r--r--src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/grpc/TestStatusUtils.java51
-rw-r--r--src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/perf/PerformanceTestServer.java216
-rw-r--r--src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/perf/TestPerf.java199
-rw-r--r--src/arrow/java/flight/flight-core/src/test/protobuf/perf.proto45
-rw-r--r--src/arrow/java/flight/flight-core/src/test/resources/logback.xml28
-rw-r--r--src/arrow/java/flight/flight-grpc/pom.xml132
-rw-r--r--src/arrow/java/flight/flight-grpc/src/main/java/org/apache/arrow/flight/FlightGrpcUtils.java161
-rw-r--r--src/arrow/java/flight/flight-grpc/src/test/java/org/apache/arrow/flight/TestFlightGrpcUtils.java193
-rw-r--r--src/arrow/java/flight/flight-grpc/src/test/protobuf/test.proto26
-rw-r--r--src/arrow/java/format/pom.xml46
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Binary.java51
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Block.java61
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/BodyCompression.java72
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/BodyCompressionMethod.java43
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Bool.java48
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Buffer.java63
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/CompressionType.java30
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java65
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/DateUnit.java30
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Decimal.java81
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/DictionaryBatch.java79
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/DictionaryEncoding.java88
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/DictionaryKind.java36
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Duration.java57
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Endianness.java34
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Feature.java62
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Field.java120
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/FieldNode.java68
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/FixedSizeBinary.java60
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/FixedSizeList.java60
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/FloatingPoint.java57
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Footer.java100
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Int.java61
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Interval.java57
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/IntervalUnit.java31
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/KeyValue.java70
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/LargeBinary.java52
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/LargeList.java52
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/LargeUtf8.java52
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/List.java48
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Map.java87
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Message.java81
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/MessageHeader.java44
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/MetadataVersion.java54
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Null.java51
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Precision.java31
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/RecordBatch.java103
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Schema.java102
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/SparseMatrixCompressedAxis.java30
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/SparseMatrixIndexCSX.java114
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensor.java92
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensorIndex.java32
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensorIndexCOO.java118
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensorIndexCSF.java173
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Struct_.java53
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Tensor.java91
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/TensorDim.java74
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Time.java66
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/TimeUnit.java32
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Timestamp.java116
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Type.java55
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Union.java74
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/UnionMode.java30
-rw-r--r--src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Utf8.java51
-rw-r--r--src/arrow/java/gandiva/CMakeLists.txt55
-rw-r--r--src/arrow/java/gandiva/README.md32
-rw-r--r--src/arrow/java/gandiva/pom.xml153
-rw-r--r--src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java72
-rw-r--r--src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/DecimalTypeUtil.java94
-rw-r--r--src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistry.java220
-rw-r--r--src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistryJniHelper.java29
-rw-r--r--src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Filter.java199
-rw-r--r--src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/FunctionSignature.java93
-rw-r--r--src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniLoader.java170
-rw-r--r--src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniWrapper.java120
-rw-r--r--src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Projector.java364
-rw-r--r--src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/SelectionVector.java87
-rw-r--r--src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/SelectionVectorInt16.java49
-rw-r--r--src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/SelectionVectorInt32.java48
-rw-r--r--src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/VectorExpander.java69
-rw-r--r--src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/exceptions/EvaluatorClosedException.java25
-rw-r--r--src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/exceptions/GandivaException.java35
-rw-r--r--src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/exceptions/UnsupportedTypeException.java27
-rw-r--r--src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/AndNode.java47
-rw-r--r--src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/ArrowTypeHelper.java350
-rw-r--r--src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/BinaryNode.java45
-rw-r--r--src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/BooleanNode.java43
-rw-r--r--src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/Condition.java42
-rw-r--r--src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/DecimalNode.java49
-rw-r--r--src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/DoubleNode.java43
-rw-r--r--src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/ExpressionTree.java46
-rw-r--r--src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/FieldNode.java43
-rw-r--r--src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/FloatNode.java43
-rw-r--r--src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/FunctionNode.java54
-rw-r--r--src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/IfNode.java52
-rw-r--r--src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/InNode.java176
-rw-r--r--src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/IntNode.java43
-rw-r--r--src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/LongNode.java43
-rw-r--r--src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/NullNode.java41
-rw-r--r--src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/OrNode.java47
-rw-r--r--src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/StringNode.java48
-rw-r--r--src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeBuilder.java230
-rw-r--r--src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeNode.java34
-rw-r--r--src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/BaseEvaluatorTest.java404
-rw-r--r--src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/DecimalTypeUtilTest.java89
-rw-r--r--src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistryTest.java65
-rw-r--r--src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/FilterProjectTest.java102
-rw-r--r--src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/FilterTest.java315
-rw-r--r--src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/MicroBenchmarkTest.java151
-rw-r--r--src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorDecimalTest.java797
-rw-r--r--src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java2470
-rw-r--r--src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/TestJniLoader.java53
-rw-r--r--src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/expression/ArrowTypeHelperTest.java105
-rw-r--r--src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/expression/TreeBuilderTest.java350
-rw-r--r--src/arrow/java/gandiva/src/test/resources/logback.xml28
-rw-r--r--src/arrow/java/memory/memory-core/pom.xml40
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/Accountant.java308
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationListener.java85
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationManager.java221
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationOutcome.java97
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationOutcomeDetails.java132
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationReservation.java88
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocatorClosedException.java35
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ArrowBuf.java1202
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java951
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BoundsChecking.java63
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferAllocator.java238
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferLedger.java525
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferManager.java53
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/CheckAllocator.java87
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ChildAllocator.java44
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/DefaultAllocationManagerOption.java133
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/LowCostIdentityHashMap.java336
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/OutOfMemoryException.java67
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/OwnershipTransferNOOP.java39
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/OwnershipTransferResult.java28
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/README.md121
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ReferenceManager.java175
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/RootAllocator.java71
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ValueWithKeyIncluded.java28
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/package-info.java26
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/DefaultRoundingPolicy.java114
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/RoundingPolicy.java26
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/SegmentRoundingPolicy.java60
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/ArrowBufPointer.java187
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/AssertionUtil.java40
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/AutoCloseableLock.java43
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/ByteFunctionHelpers.java347
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/CommonUtil.java79
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/HistoricalLog.java178
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/LargeMemoryUtil.java44
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/MemoryUtil.java170
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/StackTrace.java70
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/ArrowBufHasher.java47
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/MurmurHasher.java175
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/SimpleHasher.java116
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/util/AutoCloseables.java242
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/util/Collections2.java92
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/util/Preconditions.java1323
-rw-r--r--src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/util/VisibleForTesting.java26
-rw-r--r--src/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/memory/DefaultAllocationManagerFactory.java63
-rw-r--r--src/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestAccountant.java172
-rw-r--r--src/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestAllocationManager.java39
-rw-r--r--src/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java149
-rw-r--r--src/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestBoundaryChecking.java150
-rw-r--r--src/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestLowCostIdentityHashMap.java169
-rw-r--r--src/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/TestArrowBufPointer.java216
-rw-r--r--src/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/TestByteFunctionHelpers.java167
-rwxr-xr-xsrc/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/TestLargeMemoryUtil.java105
-rw-r--r--src/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/hash/TestArrowBufHasher.java123
-rw-r--r--src/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/util/TestCollections2.java83
-rw-r--r--src/arrow/java/memory/memory-netty/pom.xml72
-rw-r--r--src/arrow/java/memory/memory-netty/src/main/java/io/netty/buffer/ExpandableByteBuf.java56
-rw-r--r--src/arrow/java/memory/memory-netty/src/main/java/io/netty/buffer/LargeBuffer.java34
-rw-r--r--src/arrow/java/memory/memory-netty/src/main/java/io/netty/buffer/MutableWrappedByteBuf.java448
-rw-r--r--src/arrow/java/memory/memory-netty/src/main/java/io/netty/buffer/NettyArrowBuf.java622
-rw-r--r--src/arrow/java/memory/memory-netty/src/main/java/io/netty/buffer/PooledByteBufAllocatorL.java280
-rw-r--r--src/arrow/java/memory/memory-netty/src/main/java/io/netty/buffer/UnsafeDirectLittleEndian.java270
-rw-r--r--src/arrow/java/memory/memory-netty/src/main/java/org/apache/arrow/memory/ArrowByteBufAllocator.java161
-rw-r--r--src/arrow/java/memory/memory-netty/src/main/java/org/apache/arrow/memory/DefaultAllocationManagerFactory.java38
-rw-r--r--src/arrow/java/memory/memory-netty/src/main/java/org/apache/arrow/memory/NettyAllocationManager.java123
-rw-r--r--src/arrow/java/memory/memory-netty/src/test/java/io/netty/buffer/TestNettyArrowBuf.java141
-rw-r--r--src/arrow/java/memory/memory-netty/src/test/java/io/netty/buffer/TestUnsafeDirectLittleEndian.java77
-rw-r--r--src/arrow/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/ITTestLargeArrowBuf.java72
-rw-r--r--src/arrow/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/TestAllocationManagerNetty.java39
-rw-r--r--src/arrow/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/TestBaseAllocator.java1183
-rw-r--r--src/arrow/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/TestEmptyArrowBuf.java88
-rw-r--r--src/arrow/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/TestEndianness.java51
-rw-r--r--src/arrow/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/TestNettyAllocationManager.java108
-rw-r--r--src/arrow/java/memory/memory-netty/src/test/resources/logback.xml28
-rw-r--r--src/arrow/java/memory/memory-unsafe/pom.xml33
-rw-r--r--src/arrow/java/memory/memory-unsafe/src/main/java/org/apache/arrow/memory/DefaultAllocationManagerFactory.java37
-rw-r--r--src/arrow/java/memory/memory-unsafe/src/main/java/org/apache/arrow/memory/UnsafeAllocationManager.java70
-rw-r--r--src/arrow/java/memory/memory-unsafe/src/test/java/org/apache/arrow/memory/TestAllocationManagerUnsafe.java41
-rw-r--r--src/arrow/java/memory/memory-unsafe/src/test/java/org/apache/arrow/memory/TestUnsafeAllocationManager.java68
-rw-r--r--src/arrow/java/memory/pom.xml29
-rw-r--r--src/arrow/java/memory/src/test/java/io/netty/buffer/TestExpandableByteBuf.java117
-rw-r--r--src/arrow/java/performance/pom.xml233
-rw-r--r--src/arrow/java/performance/src/test/java/org/apache/arrow/adapter/AvroAdapterBenchmarks.java141
-rw-r--r--src/arrow/java/performance/src/test/java/org/apache/arrow/adapter/jdbc/JdbcAdapterBenchmarks.java359
-rw-r--r--src/arrow/java/performance/src/test/java/org/apache/arrow/algorithm/search/ParallelSearcherBenchmarks.java115
-rw-r--r--src/arrow/java/performance/src/test/java/org/apache/arrow/memory/AllocatorBenchmarks.java95
-rw-r--r--src/arrow/java/performance/src/test/java/org/apache/arrow/memory/ArrowBufBenchmarks.java82
-rw-r--r--src/arrow/java/performance/src/test/java/org/apache/arrow/memory/util/ArrowBufPointerBenchmarks.java107
-rw-r--r--src/arrow/java/performance/src/test/java/org/apache/arrow/memory/util/ByteFunctionHelpersBenchmarks.java138
-rw-r--r--src/arrow/java/performance/src/test/java/org/apache/arrow/vector/BaseValueVectorBenchmarks.java95
-rw-r--r--src/arrow/java/performance/src/test/java/org/apache/arrow/vector/BitVectorHelperBenchmarks.java229
-rw-r--r--src/arrow/java/performance/src/test/java/org/apache/arrow/vector/DecimalVectorBenchmarks.java121
-rw-r--r--src/arrow/java/performance/src/test/java/org/apache/arrow/vector/Float8Benchmarks.java122
-rw-r--r--src/arrow/java/performance/src/test/java/org/apache/arrow/vector/FloatingPointBenchmarks.java134
-rw-r--r--src/arrow/java/performance/src/test/java/org/apache/arrow/vector/IntBenchmarks.java110
-rw-r--r--src/arrow/java/performance/src/test/java/org/apache/arrow/vector/VarCharBenchmarks.java102
-rw-r--r--src/arrow/java/performance/src/test/java/org/apache/arrow/vector/VariableWidthVectorBenchmarks.java130
-rw-r--r--src/arrow/java/performance/src/test/java/org/apache/arrow/vector/VectorLoaderBenchmark.java117
-rw-r--r--src/arrow/java/performance/src/test/java/org/apache/arrow/vector/VectorUnloaderBenchmark.java109
-rw-r--r--src/arrow/java/performance/src/test/java/org/apache/arrow/vector/dictionary/DictionaryEncoderBenchmarks.java147
-rw-r--r--src/arrow/java/performance/src/test/java/org/apache/arrow/vector/ipc/WriteChannelBenchmark.java87
-rw-r--r--src/arrow/java/performance/src/test/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatchBenchmarks.java98
-rw-r--r--src/arrow/java/performance/src/test/java/org/apache/arrow/vector/util/TransferPairBenchmarks.java123
-rw-r--r--src/arrow/java/plasma/README.md39
-rw-r--r--src/arrow/java/plasma/pom.xml34
-rw-r--r--src/arrow/java/plasma/src/main/java/org/apache/arrow/plasma/ObjectStoreLink.java131
-rw-r--r--src/arrow/java/plasma/src/main/java/org/apache/arrow/plasma/PlasmaClient.java184
-rw-r--r--src/arrow/java/plasma/src/main/java/org/apache/arrow/plasma/PlasmaClientJNI.java57
-rw-r--r--src/arrow/java/plasma/src/main/java/org/apache/arrow/plasma/exceptions/DuplicateObjectException.java32
-rw-r--r--src/arrow/java/plasma/src/main/java/org/apache/arrow/plasma/exceptions/PlasmaClientException.java32
-rw-r--r--src/arrow/java/plasma/src/main/java/org/apache/arrow/plasma/exceptions/PlasmaOutOfMemoryException.java40
-rw-r--r--src/arrow/java/plasma/src/test/java/org/apache/arrow/plasma/PlasmaClientTest.java313
-rw-r--r--src/arrow/java/plasma/src/test/resources/logback.xml28
-rwxr-xr-xsrc/arrow/java/plasma/test.sh56
-rw-r--r--src/arrow/java/pom.xml839
-rw-r--r--src/arrow/java/tools/pom.xml106
-rw-r--r--src/arrow/java/tools/src/main/java/org/apache/arrow/tools/EchoServer.java146
-rw-r--r--src/arrow/java/tools/src/main/java/org/apache/arrow/tools/FileRoundtrip.java123
-rw-r--r--src/arrow/java/tools/src/main/java/org/apache/arrow/tools/FileToStream.java78
-rw-r--r--src/arrow/java/tools/src/main/java/org/apache/arrow/tools/Integration.java244
-rw-r--r--src/arrow/java/tools/src/main/java/org/apache/arrow/tools/StreamToFile.java76
-rw-r--r--src/arrow/java/tools/src/test/java/org/apache/arrow/tools/ArrowFileTestFixtures.java105
-rw-r--r--src/arrow/java/tools/src/test/java/org/apache/arrow/tools/EchoServerTest.java301
-rw-r--r--src/arrow/java/tools/src/test/java/org/apache/arrow/tools/TestFileRoundtrip.java65
-rw-r--r--src/arrow/java/tools/src/test/java/org/apache/arrow/tools/TestIntegration.java288
-rw-r--r--src/arrow/java/tools/src/test/resources/logback.xml27
-rw-r--r--src/arrow/java/vector/pom.xml274
-rw-r--r--src/arrow/java/vector/src/main/codegen/config.fmpp24
-rw-r--r--src/arrow/java/vector/src/main/codegen/data/ArrowTypes.tdd124
-rw-r--r--src/arrow/java/vector/src/main/codegen/data/ValueVectorTypes.tdd206
-rw-r--r--src/arrow/java/vector/src/main/codegen/includes/license.ftl16
-rw-r--r--src/arrow/java/vector/src/main/codegen/includes/vv_imports.ftl61
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/AbstractFieldReader.java132
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/AbstractFieldWriter.java230
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java238
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/ArrowType.java375
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/BaseReader.java85
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/BaseWriter.java131
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/CaseSensitiveStructWriters.java53
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/ComplexCopier.java191
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/ComplexReaders.java147
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/ComplexWriters.java211
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/DenseUnionReader.java229
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/DenseUnionVector.java943
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/DenseUnionWriter.java302
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/HolderReaderImpl.java173
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/NullReader.java147
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/StructWriters.java326
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/UnionFixedSizeListWriter.java319
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/UnionListWriter.java326
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/UnionMapWriter.java222
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/UnionReader.java223
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/UnionVector.java854
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/UnionWriter.java364
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/ValueHolders.java81
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/AddOrGetResult.java46
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/AllocationHelper.java95
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java930
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BaseIntVector.java43
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java1370
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java231
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java1410
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BigIntVector.java358
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java599
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BitVectorHelper.java449
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BufferBacked.java31
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BufferLayout.java153
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/DateDayVector.java347
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/DateMilliVector.java350
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/Decimal256Vector.java584
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java584
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/DensityAwareVector.java57
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/DurationVector.java406
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ElementAddressableVector.java42
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ExtensionTypeVector.java274
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/FieldVector.java93
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/FixedSizeBinaryVector.java386
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/FixedWidthVector.java36
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/Float4Vector.java361
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/Float8Vector.java362
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/FloatingPointVector.java46
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/GenerateSampleData.java337
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/IntVector.java362
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/IntervalDayVector.java433
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/IntervalMonthDayNanoVector.java442
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/IntervalYearVector.java382
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java305
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java331
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/NullCheckingForGet.java84
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java338
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/PeriodDuration.java64
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/SchemaChangeCallBack.java57
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/SmallIntVector.java389
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeMicroVector.java347
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeMilliVector.java351
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeNanoVector.java347
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeSecVector.java348
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMicroTZVector.java239
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMicroVector.java236
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMilliTZVector.java238
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMilliVector.java236
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampNanoTZVector.java241
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampNanoVector.java236
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampSecTZVector.java238
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampSecVector.java237
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampVector.java197
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TinyIntVector.java390
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java448
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/UInt1Vector.java368
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/UInt2Vector.java346
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/UInt4Vector.java340
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/UInt8Vector.java336
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java285
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java306
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java331
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VariableWidthVector.java53
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VectorDefinitionSetter.java26
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java137
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java429
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java107
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ZeroVector.java138
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/ApproxEqualsVisitor.java147
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/Range.java85
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java563
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/TypeEqualsVisitor.java154
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorEqualsVisitor.java60
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorValueEqualizer.java44
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorVisitor.java61
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/util/ValueEpsilonEqualizers.java149
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java140
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java425
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseListVector.java36
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java367
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/EmptyValuePopulator.java51
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java675
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java1036
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java879
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java122
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java440
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/Positionable.java29
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/PromotableVector.java32
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedFixedWidthVectorLike.java32
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedValueVector.java49
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedVariableWidthVectorLike.java40
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/StateTool.java44
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java608
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/VectorWithOrdinal.java34
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseReader.java118
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseWriter.java55
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/ComplexWriterImpl.java227
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableStructReaderImpl.java59
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableStructWriterFactory.java48
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java398
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/SingleListReaderImpl.java91
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/SingleStructReaderImpl.java113
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/StructOrListWriterImpl.java137
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionFixedSizeListReader.java105
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListReader.java109
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListReader.java107
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionMapReader.java77
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/reader/FieldReader.java35
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/writer/FieldWriter.java33
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compression/AbstractCompressionCodec.java116
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compression/CompressionCodec.java62
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compression/CompressionUtil.java103
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compression/NoCompressionCodec.java67
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/Dictionary.java75
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryEncoder.java196
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryHashTable.java295
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryProvider.java62
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/ListSubfieldEncoder.java137
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/StructSubfieldEncoder.java196
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/ComplexHolder.java28
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/DenseUnionHolder.java38
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/RepeatedListHolder.java26
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/RepeatedStructHolder.java26
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/UnionHolder.java37
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/ValueHolder.java31
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowFileReader.java230
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowFileWriter.java119
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowMagic.java44
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowReader.java255
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowStreamReader.java229
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowStreamWriter.java86
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowWriter.java210
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/InvalidArrowFileException.java30
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java806
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java417
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ReadChannel.java102
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/SeekableReadChannel.java43
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/WriteChannel.java162
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowBlock.java95
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowBodyCompression.java55
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowBuffer.java90
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowDictionaryBatch.java94
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowFieldNode.java64
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowFooter.java226
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowMessage.java42
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatch.java259
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/FBSerializable.java30
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/FBSerializables.java67
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/IpcOption.java44
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageChannelReader.java91
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageMetadataResult.java115
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageResult.java61
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageSerializer.java736
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/DateUnit.java50
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/FloatingPointPrecision.java55
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/IntervalUnit.java52
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/MetadataVersion.java65
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/TimeUnit.java50
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java1016
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/UnionMode.java57
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/DictionaryEncoding.java88
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/ExtensionTypeRegistry.java42
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java306
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/FieldType.java123
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Schema.java247
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/ByteArrayReadableSeekableByteChannel.java86
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/CallBack.java25
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/DataSizeRoundingUtil.java99
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/DateUtility.java134
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java188
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/DictionaryUtility.java145
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/ElementAddressableVectorIterator.java86
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/JsonStringArrayList.java55
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/JsonStringHashMap.java48
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinal.java67
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinalImpl.java248
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/MultiMapWithOrdinal.java230
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/OversizedAllocationException.java52
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/PromotableMultiMapWithOrdinal.java133
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/SchemaChangeRuntimeException.java48
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/SchemaUtility.java63
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/Text.java688
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/TransferPair.java33
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/Validator.java190
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/ValueVectorUtility.java187
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java542
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/VectorBatchAppender.java39
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/VectorSchemaRootAppender.java83
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/MetadataV4UnionChecker.java82
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateUtil.java61
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java246
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorDataVisitor.java180
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorTypeVisitor.java378
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorVisitor.java273
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/util/TestSchemaUtil.java51
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/DirtyRootAllocator.java52
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ITTestLargeVector.java280
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestBitVector.java543
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestBitVectorHelper.java235
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestBufferOwnershipTransfer.java131
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestCopyFrom.java1104
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestDecimal256Vector.java357
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestDecimalVector.java365
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java639
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java1032
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestDurationVector.java137
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeBinaryVector.java279
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java507
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalMonthDayNanoVector.java99
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalYearVector.java58
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java982
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarBinaryVector.java104
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java816
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java981
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java1113
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestNullCheckingForGet.java92
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestOutOfMemoryForValueVector.java73
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestOversizedAllocationForValueVector.java132
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestPeriodDuration.java46
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java410
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java183
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java98
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java520
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestUtils.java45
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java3061
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharListVector.java77
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVectorAlloc.java169
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java474
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java168
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java318
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVectorUnloadLoad.java332
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java740
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/compare/TestTypeEqualsVisitor.java185
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestComplexCopier.java763
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java167
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java1335
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java849
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/ITTestIPCWithLargeArrowBuffers.java187
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/MessageSerializerTest.java247
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFile.java134
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFooter.java68
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowReaderWriter.java882
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStream.java147
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStreamPipe.java161
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestJSONFile.java458
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestRoundTrip.java628
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestUIntDictionaryRoundTrip.java246
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/message/TestMessageMetadataResult.java36
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java169
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/testing/RandomDataGenerator.java44
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/testing/TestValueVectorPopulator.java604
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java708
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java420
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestField.java63
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java254
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/DecimalUtilityTest.java127
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestDataSizeRoundingUtil.java76
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestElementAddressableVectorIterator.java134
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestMultiMapWithOrdinal.java60
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestValidator.java56
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java794
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorBatchAppender.java72
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorSchemaRootAppender.java161
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVector.java260
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorFull.java234
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorSchemaRoot.java101
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorTypeVisitor.java301
-rw-r--r--src/arrow/java/vector/src/test/resources/logback.xml28
966 files changed, 159504 insertions, 0 deletions
diff --git a/src/arrow/java/README.md b/src/arrow/java/README.md
new file mode 100644
index 000000000..29d1fcf4c
--- /dev/null
+++ b/src/arrow/java/README.md
@@ -0,0 +1,164 @@
+<!---
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+# Arrow Java
+
+## Getting Started
+
+The following guides explain the fundamental data structures used in the Java implementation of Apache Arrow.
+
+- [ValueVector](https://arrow.apache.org/docs/java/vector.html) is an abstraction that is used to store a sequence of values having the same type in an individual column.
+- [VectorSchemaRoot](https://arrow.apache.org/docs/java/vector_schema_root.html) is a container that can hold multiple vectors based on a schema.
+- The [Reading/Writing IPC formats](https://arrow.apache.org/docs/java/ipc.html) guide explains how to stream record batches as well as serializing record batches to files.
+
+Generated javadoc documentation is available [here](https://arrow.apache.org/docs/java/).
+
+## Setup Build Environment
+
+install:
+ - Java 8 or later
+ - Maven 3.3 or later
+
+## Building and running tests
+
+```
+git submodule update --init --recursive # Needed for flight
+cd java
+mvn install
+```
+## Building and running tests for arrow jni modules like gandiva and orc (optional)
+
+[Arrow Cpp][2] must be built before this step. The cpp build directory must
+be provided as the value for argument arrow.cpp.build.dir. eg.
+
+```
+cd java
+mvn install -P arrow-jni -am -Darrow.cpp.build.dir=../../release
+```
+
+The gandiva library is still in Alpha stages, and subject to API changes without
+deprecation warnings.
+
+## Flatbuffers dependency
+
+Arrow uses Google's Flatbuffers to transport metadata. The java version of the library
+requires the generated flatbuffer classes can only be used with the same version that
+generated them. Arrow packages a version of the arrow-vector module that shades flatbuffers
+and arrow-format into a single JAR. Using the classifier "shade-format-flatbuffers" in your
+pom.xml will make use of this JAR, you can then exclude/resolve the original dependency to
+a version of your choosing.
+
+### Updating the flatbuffers generated code
+
+1. Verify that your version of flatc matches the declared dependency:
+
+```bash
+$ flatc --version
+flatc version 1.12.0
+
+$ grep "dep.fbs.version" java/pom.xml
+ <dep.fbs.version>1.12.0</dep.fbs.version>
+```
+
+2. Generate the flatbuffer java files by performing the following:
+
+```bash
+cd $ARROW_HOME
+
+# remove the existing files
+rm -rf java/format/src
+
+# regenerate from the .fbs files
+flatc --java -o java/format/src/main/java format/*.fbs
+
+# prepend license header
+find java/format/src -type f | while read file; do
+ (cat header | while read line; do echo "// $line"; done; cat $file) > $file.tmp
+ mv $file.tmp $file
+done
+```
+
+## Performance Tuning
+
+There are several system/environmental variables that users can configure. These trade off safety (they turn off checking) for speed. Typically they are only used in production settings after the code has been thoroughly tested without using them.
+
+* Bounds Checking for memory accesses: Bounds checking is on by default. You can disable it by setting either the
+system property("arrow.enable_unsafe_memory_access") or the environmental variable
+("ARROW_ENABLE_UNSAFE_MEMORY_ACCESS") to "true". When both the system property and the environmental
+variable are set, the system property takes precedence.
+
+* null checking for gets: ValueVector get methods (not getObject) methods by default verify the slot is not null. You can disable it by setting either the
+system property("arrow.enable_null_check_for_get") or the environmental variable
+("ARROW_ENABLE_NULL_CHECK_FOR_GET") to "false". When both the system property and the environmental
+variable are set, the system property takes precedence.
+
+## Java Properties
+
+ * For java 9 or later, should set "-Dio.netty.tryReflectionSetAccessible=true".
+This fixes `java.lang.UnsupportedOperationException: sun.misc.Unsafe or java.nio.DirectByteBuffer.(long, int) not available`. thrown by netty.
+ * To support duplicate fields in a `StructVector` enable "-Darrow.struct.conflict.policy=CONFLICT_APPEND".
+Duplicate fields are ignored (`CONFLICT_REPLACE`) by default and overwritten. To support different policies for
+conflicting or duplicate fields set this JVM flag or use the correct static constructor methods for `StructVector`s.
+
+## Java Code Style Guide
+
+Arrow Java follows the Google style guide [here][3] with the following
+differences:
+
+* Imports are grouped, from top to bottom, in this order: static imports,
+standard Java, org.\*, com.\*
+* Line length can be up to 120 characters
+* Operators for line wrapping are at end-of-line
+* Naming rules for methods, parameters, etc. have been relaxed
+* Disabled `NoFinalizer`, `OverloadMethodsDeclarationOrder`, and
+`VariableDeclarationUsageDistance` due to the existing code base. These rules
+should be followed when possible.
+
+Refer to `java/dev/checkstyle/checkstyle.xml for rule specifics.
+
+## Test Logging Configuration
+
+When running tests, Arrow Java uses the Logback logger with SLF4J. By default,
+it uses the logback.xml present in the corresponding module's src/test/resources
+directory, which has the default log level set to INFO.
+Arrow Java can be built with an alternate logback configuration file using the
+following command run in the project root directory:
+
+```bash
+mvn -Dlogback.configurationFile=file:<path-of-logback-file>
+```
+
+See [Logback Configuration][1] for more details.
+
+## Integration Tests
+
+Integration tests which require more time or more memory can be run by activating
+the `integration-tests` profile. This activates the [maven failsafe][4] plugin
+and any class prefixed with `IT` will be run during the testing phase. The integration
+tests currently require a larger amount of memory (>4GB) and time to complete. To activate
+the profile:
+
+```bash
+mvn -Pintegration-tests <rest of mvn arguments>
+```
+
+[1]: https://logback.qos.ch/manual/configuration.html
+[2]: https://github.com/apache/arrow/blob/master/cpp/README.md
+[3]: http://google.github.io/styleguide/javaguide.html
+[4]: https://maven.apache.org/surefire/maven-failsafe-plugin/
diff --git a/src/arrow/java/adapter/avro/pom.xml b/src/arrow/java/adapter/avro/pom.xml
new file mode 100644
index 000000000..1f3fea849
--- /dev/null
+++ b/src/arrow/java/adapter/avro/pom.xml
@@ -0,0 +1,59 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+ license agreements. See the NOTICE file distributed with this work for additional
+ information regarding copyright ownership. The ASF licenses this file to
+ You under the Apache License, Version 2.0 (the "License"); you may not use
+ this file except in compliance with the License. You may obtain a copy of
+ the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+ by applicable law or agreed to in writing, software distributed under the
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ OF ANY KIND, either express or implied. See the License for the specific
+ language governing permissions and limitations under the License. -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-java-root</artifactId>
+ <version>6.0.1</version>
+ <relativePath>../../pom.xml</relativePath>
+ </parent>
+
+ <artifactId>arrow-avro</artifactId>
+ <name>Arrow AVRO Adapter</name>
+ <description>(Contrib/Experimental) A library for converting Avro data to Arrow data.</description>
+ <url>http://maven.apache.org</url>
+
+ <dependencies>
+
+ <!-- https://mvnrepository.com/artifact/org.apache.arrow/arrow-memory-core -->
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-memory-core</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+
+ <!-- https://mvnrepository.com/artifact/org.apache.arrow/arrow-memory-netty -->
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-memory-netty</artifactId>
+ <version>${project.version}</version>
+ <scope>runtime</scope>
+ </dependency>
+
+ <!-- https://mvnrepository.com/artifact/org.apache.arrow/arrow-vector -->
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-vector</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.avro</groupId>
+ <artifactId>avro</artifactId>
+ <version>${dep.avro.version}</version>
+ </dependency>
+ </dependencies>
+
+</project>
diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrow.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrow.java
new file mode 100644
index 000000000..9fb5ce291
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrow.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow;
+
+import java.io.IOException;
+
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.avro.Schema;
+import org.apache.avro.io.Decoder;
+
+/**
+ * Utility class to convert Avro objects to columnar Arrow format objects.
+ */
+public class AvroToArrow {
+
+ /**
+ * Fetch the data from {@link Decoder} and convert it to Arrow objects.
+ * Only for testing purpose.
+ * @param schema avro schema.
+ * @param decoder avro decoder
+ * @param config configuration of the conversion.
+ * @return Arrow Data Objects {@link VectorSchemaRoot}
+ */
+ static VectorSchemaRoot avroToArrow(Schema schema, Decoder decoder, AvroToArrowConfig config)
+ throws IOException {
+ Preconditions.checkNotNull(schema, "Avro schema object can not be null");
+ Preconditions.checkNotNull(decoder, "Avro decoder object can not be null");
+ Preconditions.checkNotNull(config, "config can not be null");
+
+ return AvroToArrowUtils.avroToArrowVectors(schema, decoder, config);
+ }
+
+ /**
+ * Fetch the data from {@link Decoder} and iteratively convert it to Arrow objects.
+ * @param schema avro schema
+ * @param decoder avro decoder
+ * @param config configuration of the conversion.
+ * @throws IOException on error
+ */
+ public static AvroToArrowVectorIterator avroToArrowIterator(
+ Schema schema,
+ Decoder decoder,
+ AvroToArrowConfig config) throws IOException {
+
+ Preconditions.checkNotNull(schema, "Avro schema object can not be null");
+ Preconditions.checkNotNull(decoder, "Avro decoder object can not be null");
+ Preconditions.checkNotNull(config, "config can not be null");
+
+ return AvroToArrowVectorIterator.create(decoder, schema, config);
+ }
+}
diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrowConfig.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrowConfig.java
new file mode 100644
index 000000000..4f59ef384
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrowConfig.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow;
+
+import java.util.Set;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+
+/**
+ * This class configures the Avro-to-Arrow conversion process.
+ */
+public class AvroToArrowConfig {
+
+ private final BufferAllocator allocator;
+ /**
+ * The maximum rowCount to read each time when partially convert data.
+ * Default value is 1024 and -1 means read all data into one vector.
+ */
+ private final int targetBatchSize;
+
+ /**
+ * The dictionary provider used for enum type.
+ * If avro schema has enum type, will create dictionary and update this provider.
+ */
+ private final DictionaryProvider.MapDictionaryProvider provider;
+
+ /**
+ * The field names which to skip when reading decoder values.
+ */
+ private final Set<String> skipFieldNames;
+
+ /**
+ * Instantiate an instance.
+ * @param allocator The memory allocator to construct the Arrow vectors with.
+ * @param targetBatchSize The maximum rowCount to read each time when partially convert data.
+ * @param provider The dictionary provider used for enum type, adapter will update this provider.
+ * @param skipFieldNames Field names which to skip.
+ */
+ AvroToArrowConfig(
+ BufferAllocator allocator,
+ int targetBatchSize,
+ DictionaryProvider.MapDictionaryProvider provider,
+ Set<String> skipFieldNames) {
+
+ Preconditions.checkArgument(targetBatchSize == AvroToArrowVectorIterator.NO_LIMIT_BATCH_SIZE ||
+ targetBatchSize > 0, "invalid targetBatchSize: %s", targetBatchSize);
+
+ this.allocator = allocator;
+ this.targetBatchSize = targetBatchSize;
+ this.provider = provider;
+ this.skipFieldNames = skipFieldNames;
+ }
+
+ public BufferAllocator getAllocator() {
+ return allocator;
+ }
+
+ public int getTargetBatchSize() {
+ return targetBatchSize;
+ }
+
+ public DictionaryProvider.MapDictionaryProvider getProvider() {
+ return provider;
+ }
+
+ public Set<String> getSkipFieldNames() {
+ return skipFieldNames;
+ }
+}
diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrowConfigBuilder.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrowConfigBuilder.java
new file mode 100644
index 000000000..474c1eb5c
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrowConfigBuilder.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow;
+
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+
+/**
+ * This class builds {@link AvroToArrowConfig}s.
+ */
+public class AvroToArrowConfigBuilder {
+
+ private BufferAllocator allocator;
+
+ private int targetBatchSize;
+
+ private DictionaryProvider.MapDictionaryProvider provider;
+
+ private Set<String> skipFieldNames;
+
+ /**
+ * Default constructor for the {@link AvroToArrowConfigBuilder}.
+ */
+ public AvroToArrowConfigBuilder(BufferAllocator allocator) {
+ this.allocator = allocator;
+ this.targetBatchSize = AvroToArrowVectorIterator.DEFAULT_BATCH_SIZE;
+ this.provider = new DictionaryProvider.MapDictionaryProvider();
+ this.skipFieldNames = new HashSet<>();
+ }
+
+ public AvroToArrowConfigBuilder setTargetBatchSize(int targetBatchSize) {
+ this.targetBatchSize = targetBatchSize;
+ return this;
+ }
+
+ public AvroToArrowConfigBuilder setProvider(DictionaryProvider.MapDictionaryProvider provider) {
+ this.provider = provider;
+ return this;
+ }
+
+ public AvroToArrowConfigBuilder setSkipFieldNames(Set<String> skipFieldNames) {
+ this.skipFieldNames = skipFieldNames;
+ return this;
+ }
+
+ /**
+ * This builds the {@link AvroToArrowConfig} from the provided params.
+ */
+ public AvroToArrowConfig build() {
+ return new AvroToArrowConfig(
+ allocator,
+ targetBatchSize,
+ provider,
+ skipFieldNames);
+ }
+}
diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrowUtils.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrowUtils.java
new file mode 100644
index 000000000..80293c8b8
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrowUtils.java
@@ -0,0 +1,805 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow;
+
+import static org.apache.arrow.vector.types.FloatingPointPrecision.DOUBLE;
+import static org.apache.arrow.vector.types.FloatingPointPrecision.SINGLE;
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import org.apache.arrow.consumers.AvroArraysConsumer;
+import org.apache.arrow.consumers.AvroBooleanConsumer;
+import org.apache.arrow.consumers.AvroBytesConsumer;
+import org.apache.arrow.consumers.AvroDoubleConsumer;
+import org.apache.arrow.consumers.AvroEnumConsumer;
+import org.apache.arrow.consumers.AvroFixedConsumer;
+import org.apache.arrow.consumers.AvroFloatConsumer;
+import org.apache.arrow.consumers.AvroIntConsumer;
+import org.apache.arrow.consumers.AvroLongConsumer;
+import org.apache.arrow.consumers.AvroMapConsumer;
+import org.apache.arrow.consumers.AvroNullConsumer;
+import org.apache.arrow.consumers.AvroStringConsumer;
+import org.apache.arrow.consumers.AvroStructConsumer;
+import org.apache.arrow.consumers.AvroUnionsConsumer;
+import org.apache.arrow.consumers.CompositeAvroConsumer;
+import org.apache.arrow.consumers.Consumer;
+import org.apache.arrow.consumers.SkipConsumer;
+import org.apache.arrow.consumers.SkipFunction;
+import org.apache.arrow.consumers.logical.AvroDateConsumer;
+import org.apache.arrow.consumers.logical.AvroDecimalConsumer;
+import org.apache.arrow.consumers.logical.AvroTimeMicroConsumer;
+import org.apache.arrow.consumers.logical.AvroTimeMillisConsumer;
+import org.apache.arrow.consumers.logical.AvroTimestampMicrosConsumer;
+import org.apache.arrow.consumers.logical.AvroTimestampMillisConsumer;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.BaseIntVector;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.BitVector;
+import org.apache.arrow.vector.DateDayVector;
+import org.apache.arrow.vector.DecimalVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.FixedSizeBinaryVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.TimeMicroVector;
+import org.apache.arrow.vector.TimeMilliVector;
+import org.apache.arrow.vector.TimeStampMicroVector;
+import org.apache.arrow.vector.TimeStampMilliVector;
+import org.apache.arrow.vector.VarBinaryVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.MapVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryEncoder;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.types.DateUnit;
+import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.UnionMode;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.JsonStringArrayList;
+import org.apache.arrow.vector.util.ValueVectorUtility;
+import org.apache.avro.LogicalType;
+import org.apache.avro.LogicalTypes;
+import org.apache.avro.Schema;
+import org.apache.avro.Schema.Type;
+import org.apache.avro.io.Decoder;
+
+/**
+ * Class that does most of the work to convert Avro data into Arrow columnar format Vector objects.
+ */
+public class AvroToArrowUtils {
+
+ /**
+ * Creates a {@link Consumer} from the {@link Schema}
+ *
+ <p>This method currently performs following type mapping for Avro data types to corresponding Arrow data types.
+ *
+ * <ul>
+ * <li>STRING --> ArrowType.Utf8</li>
+ * <li>INT --> ArrowType.Int(32, signed)</li>
+ * <li>LONG --> ArrowType.Int(64, signed)</li>
+ * <li>FLOAT --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)</li>
+ * <li>DOUBLE --> ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)</li>
+ * <li>BOOLEAN --> ArrowType.Bool</li>
+ * <li>BYTES --> ArrowType.Binary</li>
+ * <li>ARRAY --> ArrowType.List</li>
+ * <li>MAP --> ArrowType.Map</li>
+ * <li>FIXED --> ArrowType.FixedSizeBinary</li>
+ * <li>RECORD --> ArrowType.Struct</li>
+ * <li>UNION --> ArrowType.Union</li>
+ * <li>ENUM--> ArrowType.Int</li>
+ * <li>DECIMAL --> ArrowType.Decimal</li>
+ * <li>Date --> ArrowType.Date(DateUnit.DAY)</li>
+ * <li>TimeMillis --> ArrowType.Time(TimeUnit.MILLISECOND, 32)</li>
+ * <li>TimeMicros --> ArrowType.Time(TimeUnit.MICROSECOND, 64)</li>
+ * <li>TimestampMillis --> ArrowType.Timestamp(TimeUnit.MILLISECOND, null)</li>
+ * <li>TimestampMicros --> ArrowType.Timestamp(TimeUnit.MICROSECOND, null)</li>
+ * </ul>
+ */
+
+ private static Consumer createConsumer(Schema schema, String name, AvroToArrowConfig config) {
+ return createConsumer(schema, name, false, config, null);
+ }
+
+ private static Consumer createConsumer(Schema schema, String name, AvroToArrowConfig config, FieldVector vector) {
+ return createConsumer(schema, name, false, config, vector);
+ }
+
+ /**
+ * Create a consumer with the given Avro schema.
+ *
+ * @param schema avro schema
+ * @param name arrow field name
+ * @param consumerVector vector to keep in consumer, if v == null, will create a new vector via field.
+ * @return consumer
+ */
+ private static Consumer createConsumer(
+ Schema schema,
+ String name,
+ boolean nullable,
+ AvroToArrowConfig config,
+ FieldVector consumerVector) {
+
+ Preconditions.checkNotNull(schema, "Avro schema object can't be null");
+ Preconditions.checkNotNull(config, "Config can't be null");
+
+ final BufferAllocator allocator = config.getAllocator();
+
+ final Type type = schema.getType();
+ final LogicalType logicalType = schema.getLogicalType();
+
+ final ArrowType arrowType;
+ final FieldType fieldType;
+ final FieldVector vector;
+ final Consumer consumer;
+
+ switch (type) {
+ case UNION:
+ consumer = createUnionConsumer(schema, name, config, consumerVector);
+ break;
+ case ARRAY:
+ consumer = createArrayConsumer(schema, name, config, consumerVector);
+ break;
+ case MAP:
+ consumer = createMapConsumer(schema, name, config, consumerVector);
+ break;
+ case RECORD:
+ consumer = createStructConsumer(schema, name, config, consumerVector);
+ break;
+ case ENUM:
+ consumer = createEnumConsumer(schema, name, config, consumerVector);
+ break;
+ case STRING:
+ arrowType = new ArrowType.Utf8();
+ fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema));
+ vector = createVector(consumerVector, fieldType, name, allocator);
+ consumer = new AvroStringConsumer((VarCharVector) vector);
+ break;
+ case FIXED:
+ Map<String, String> extProps = createExternalProps(schema);
+ if (logicalType instanceof LogicalTypes.Decimal) {
+ arrowType = createDecimalArrowType((LogicalTypes.Decimal) logicalType);
+ fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema, extProps));
+ vector = createVector(consumerVector, fieldType, name, allocator);
+ consumer = new AvroDecimalConsumer.FixedDecimalConsumer((DecimalVector) vector, schema.getFixedSize());
+ } else {
+ arrowType = new ArrowType.FixedSizeBinary(schema.getFixedSize());
+ fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema, extProps));
+ vector = createVector(consumerVector, fieldType, name, allocator);
+ consumer = new AvroFixedConsumer((FixedSizeBinaryVector) vector, schema.getFixedSize());
+ }
+ break;
+ case INT:
+ if (logicalType instanceof LogicalTypes.Date) {
+ arrowType = new ArrowType.Date(DateUnit.DAY);
+ fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema));
+ vector = createVector(consumerVector, fieldType, name, allocator);
+ consumer = new AvroDateConsumer((DateDayVector) vector);
+ } else if (logicalType instanceof LogicalTypes.TimeMillis) {
+ arrowType = new ArrowType.Time(TimeUnit.MILLISECOND, 32);
+ fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema));
+ vector = createVector(consumerVector, fieldType, name, allocator);
+ consumer = new AvroTimeMillisConsumer((TimeMilliVector) vector);
+ } else {
+ arrowType = new ArrowType.Int(32, /*signed=*/true);
+ fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema));
+ vector = createVector(consumerVector, fieldType, name, allocator);
+ consumer = new AvroIntConsumer((IntVector) vector);
+ }
+ break;
+ case BOOLEAN:
+ arrowType = new ArrowType.Bool();
+ fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema));
+ vector = createVector(consumerVector, fieldType, name, allocator);
+ consumer = new AvroBooleanConsumer((BitVector) vector);
+ break;
+ case LONG:
+ if (logicalType instanceof LogicalTypes.TimeMicros) {
+ arrowType = new ArrowType.Time(TimeUnit.MICROSECOND, 64);
+ fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema));
+ vector = createVector(consumerVector, fieldType, name, allocator);
+ consumer = new AvroTimeMicroConsumer((TimeMicroVector) vector);
+ } else if (logicalType instanceof LogicalTypes.TimestampMillis) {
+ arrowType = new ArrowType.Timestamp(TimeUnit.MILLISECOND, null);
+ fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema));
+ vector = createVector(consumerVector, fieldType, name, allocator);
+ consumer = new AvroTimestampMillisConsumer((TimeStampMilliVector) vector);
+ } else if (logicalType instanceof LogicalTypes.TimestampMicros) {
+ arrowType = new ArrowType.Timestamp(TimeUnit.MICROSECOND, null);
+ fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema));
+ vector = createVector(consumerVector, fieldType, name, allocator);
+ consumer = new AvroTimestampMicrosConsumer((TimeStampMicroVector) vector);
+ } else {
+ arrowType = new ArrowType.Int(64, /*signed=*/true);
+ fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema));
+ vector = createVector(consumerVector, fieldType, name, allocator);
+ consumer = new AvroLongConsumer((BigIntVector) vector);
+ }
+ break;
+ case FLOAT:
+ arrowType = new ArrowType.FloatingPoint(SINGLE);
+ fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema));
+ vector = createVector(consumerVector, fieldType, name, allocator);
+ consumer = new AvroFloatConsumer((Float4Vector) vector);
+ break;
+ case DOUBLE:
+ arrowType = new ArrowType.FloatingPoint(DOUBLE);
+ fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema));
+ vector = createVector(consumerVector, fieldType, name, allocator);
+ consumer = new AvroDoubleConsumer((Float8Vector) vector);
+ break;
+ case BYTES:
+ if (logicalType instanceof LogicalTypes.Decimal) {
+ arrowType = createDecimalArrowType((LogicalTypes.Decimal) logicalType);
+ fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema));
+ vector = createVector(consumerVector, fieldType, name, allocator);
+ consumer = new AvroDecimalConsumer.BytesDecimalConsumer((DecimalVector) vector);
+ } else {
+ arrowType = new ArrowType.Binary();
+ fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema));
+ vector = createVector(consumerVector, fieldType, name, allocator);
+ consumer = new AvroBytesConsumer((VarBinaryVector) vector);
+ }
+ break;
+ case NULL:
+ arrowType = new ArrowType.Null();
+ fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema));
+ vector = fieldType.createNewSingleVector(name, allocator, /*schemaCallback=*/null);
+ consumer = new AvroNullConsumer((NullVector) vector);
+ break;
+ default:
+ // no-op, shouldn't get here
+ throw new UnsupportedOperationException("Can't convert avro type %s to arrow type." + type.getName());
+ }
+ return consumer;
+ }
+
+ private static ArrowType createDecimalArrowType(LogicalTypes.Decimal logicalType) {
+ final int scale = logicalType.getScale();
+ final int precision = logicalType.getPrecision();
+ Preconditions.checkArgument(precision > 0 && precision <= 38,
+ "Precision must be in range of 1 to 38");
+ Preconditions.checkArgument(scale >= 0 && scale <= 38,
+ "Scale must be in range of 0 to 38.");
+ Preconditions.checkArgument(scale <= precision,
+ "Invalid decimal scale: %s (greater than precision: %s)", scale, precision);
+
+ return new ArrowType.Decimal(precision, scale, 128);
+
+ }
+
+ private static Consumer createSkipConsumer(Schema schema) {
+
+ SkipFunction skipFunction;
+ Type type = schema.getType();
+
+ switch (type) {
+ case UNION:
+ List<Consumer> unionDelegates = schema.getTypes().stream().map(s ->
+ createSkipConsumer(s)).collect(Collectors.toList());
+ skipFunction = decoder -> unionDelegates.get(decoder.readInt()).consume(decoder);
+
+ break;
+ case ARRAY:
+ Consumer elementDelegate = createSkipConsumer(schema.getElementType());
+ skipFunction = decoder -> {
+ for (long i = decoder.skipArray(); i != 0; i = decoder.skipArray()) {
+ for (long j = 0; j < i; j++) {
+ elementDelegate.consume(decoder);
+ }
+ }
+ };
+ break;
+ case MAP:
+ Consumer valueDelegate = createSkipConsumer(schema.getValueType());
+ skipFunction = decoder -> {
+ for (long i = decoder.skipMap(); i != 0; i = decoder.skipMap()) {
+ for (long j = 0; j < i; j++) {
+ decoder.skipString(); // Discard key
+ valueDelegate.consume(decoder);
+ }
+ }
+ };
+ break;
+ case RECORD:
+ List<Consumer> delegates = schema.getFields().stream().map(field ->
+ createSkipConsumer(field.schema())).collect(Collectors.toList());
+
+ skipFunction = decoder -> {
+ for (Consumer consumer : delegates) {
+ consumer.consume(decoder);
+ }
+ };
+
+ break;
+ case ENUM:
+ skipFunction = decoder -> decoder.readEnum();
+ break;
+ case STRING:
+ skipFunction = decoder -> decoder.skipString();
+ break;
+ case FIXED:
+ skipFunction = decoder -> decoder.skipFixed(schema.getFixedSize());
+ break;
+ case INT:
+ skipFunction = decoder -> decoder.readInt();
+ break;
+ case BOOLEAN:
+ skipFunction = decoder -> decoder.skipFixed(1);
+ break;
+ case LONG:
+ skipFunction = decoder -> decoder.readLong();
+ break;
+ case FLOAT:
+ skipFunction = decoder -> decoder.readFloat();
+ break;
+ case DOUBLE:
+ skipFunction = decoder -> decoder.readDouble();
+ break;
+ case BYTES:
+ skipFunction = decoder -> decoder.skipBytes();
+ break;
+ case NULL:
+ skipFunction = decoder -> { };
+ break;
+ default:
+ // no-op, shouldn't get here
+ throw new UnsupportedOperationException("Invalid avro type: " + type.getName());
+ }
+
+ return new SkipConsumer(skipFunction);
+ }
+
+ static CompositeAvroConsumer createCompositeConsumer(
+ Schema schema, AvroToArrowConfig config) {
+
+ List<Consumer> consumers = new ArrayList<>();
+ final Set<String> skipFieldNames = config.getSkipFieldNames();
+
+ Schema.Type type = schema.getType();
+ if (type == Type.RECORD) {
+ for (Schema.Field field : schema.getFields()) {
+ if (skipFieldNames.contains(field.name())) {
+ consumers.add(createSkipConsumer(field.schema()));
+ } else {
+ Consumer consumer = createConsumer(field.schema(), field.name(), config);
+ consumers.add(consumer);
+ }
+
+ }
+ } else {
+ Consumer consumer = createConsumer(schema, "", config);
+ consumers.add(consumer);
+ }
+
+ return new CompositeAvroConsumer(consumers);
+ }
+
+ private static FieldVector createVector(FieldVector consumerVector, FieldType fieldType,
+ String name, BufferAllocator allocator) {
+ return consumerVector != null ? consumerVector : fieldType.createNewSingleVector(name, allocator, null);
+ }
+
+ private static String getDefaultFieldName(ArrowType type) {
+ Types.MinorType minorType = Types.getMinorTypeForArrowType(type);
+ return minorType.name().toLowerCase();
+ }
+
+ private static Field avroSchemaToField(Schema schema, String name, AvroToArrowConfig config) {
+ return avroSchemaToField(schema, name, config, null);
+ }
+
+ private static Field avroSchemaToField(
+ Schema schema,
+ String name,
+ AvroToArrowConfig config,
+ Map<String, String> externalProps) {
+
+ final Type type = schema.getType();
+ final LogicalType logicalType = schema.getLogicalType();
+ final List<Field> children = new ArrayList<>();
+ final FieldType fieldType;
+
+ switch (type) {
+ case UNION:
+ for (int i = 0; i < schema.getTypes().size(); i++) {
+ Schema childSchema = schema.getTypes().get(i);
+ // Union child vector should use default name
+ children.add(avroSchemaToField(childSchema, null, config));
+ }
+ fieldType = createFieldType(new ArrowType.Union(UnionMode.Sparse, null), schema, externalProps);
+ break;
+ case ARRAY:
+ Schema elementSchema = schema.getElementType();
+ children.add(avroSchemaToField(elementSchema, elementSchema.getName(), config));
+ fieldType = createFieldType(new ArrowType.List(), schema, externalProps);
+ break;
+ case MAP:
+ // MapVector internal struct field and key field should be non-nullable
+ FieldType keyFieldType = new FieldType(/*nullable=*/false, new ArrowType.Utf8(), /*dictionary=*/null);
+ Field keyField = new Field("key", keyFieldType, /*children=*/null);
+ Field valueField = avroSchemaToField(schema.getValueType(), "value", config);
+
+ FieldType structFieldType = new FieldType(false, new ArrowType.Struct(), /*dictionary=*/null);
+ Field structField = new Field("internal", structFieldType, Arrays.asList(keyField, valueField));
+ children.add(structField);
+ fieldType = createFieldType(new ArrowType.Map(/*keySorted=*/false), schema, externalProps);
+ break;
+ case RECORD:
+ final Set<String> skipFieldNames = config.getSkipFieldNames();
+ for (int i = 0; i < schema.getFields().size(); i++) {
+ final Schema.Field field = schema.getFields().get(i);
+ Schema childSchema = field.schema();
+ String fullChildName = String.format("%s.%s", name, field.name());
+ if (!skipFieldNames.contains(fullChildName)) {
+ final Map<String, String> extProps = new HashMap<>();
+ String doc = field.doc();
+ Set<String> aliases = field.aliases();
+ if (doc != null) {
+ extProps.put("doc", doc);
+ }
+ if (aliases != null) {
+ extProps.put("aliases", convertAliases(aliases));
+ }
+ children.add(avroSchemaToField(childSchema, fullChildName, config, extProps));
+ }
+ }
+ fieldType = createFieldType(new ArrowType.Struct(), schema, externalProps);
+ break;
+ case ENUM:
+ DictionaryProvider.MapDictionaryProvider provider = config.getProvider();
+ int current = provider.getDictionaryIds().size();
+ int enumCount = schema.getEnumSymbols().size();
+ ArrowType.Int indexType = DictionaryEncoder.getIndexType(enumCount);
+
+ fieldType = createFieldType(indexType, schema, externalProps,
+ new DictionaryEncoding(current, /*ordered=*/false, /*indexType=*/indexType));
+ break;
+
+ case STRING:
+ fieldType = createFieldType(new ArrowType.Utf8(), schema, externalProps);
+ break;
+ case FIXED:
+ final ArrowType fixedArrowType;
+ if (logicalType instanceof LogicalTypes.Decimal) {
+ fixedArrowType = createDecimalArrowType((LogicalTypes.Decimal) logicalType);
+ } else {
+ fixedArrowType = new ArrowType.FixedSizeBinary(schema.getFixedSize());
+ }
+ fieldType = createFieldType(fixedArrowType, schema, externalProps);
+ break;
+ case INT:
+ final ArrowType intArrowType;
+ if (logicalType instanceof LogicalTypes.Date) {
+ intArrowType = new ArrowType.Date(DateUnit.DAY);
+ } else if (logicalType instanceof LogicalTypes.TimeMillis) {
+ intArrowType = new ArrowType.Time(TimeUnit.MILLISECOND, 32);
+ } else {
+ intArrowType = new ArrowType.Int(32, /*signed=*/true);
+ }
+ fieldType = createFieldType(intArrowType, schema, externalProps);
+ break;
+ case BOOLEAN:
+ fieldType = createFieldType(new ArrowType.Bool(), schema, externalProps);
+ break;
+ case LONG:
+ final ArrowType longArrowType;
+ if (logicalType instanceof LogicalTypes.TimeMicros) {
+ longArrowType = new ArrowType.Time(TimeUnit.MICROSECOND, 64);
+ } else if (logicalType instanceof LogicalTypes.TimestampMillis) {
+ longArrowType = new ArrowType.Timestamp(TimeUnit.MILLISECOND, null);
+ } else if (logicalType instanceof LogicalTypes.TimestampMicros) {
+ longArrowType = new ArrowType.Timestamp(TimeUnit.MICROSECOND, null);
+ } else {
+ longArrowType = new ArrowType.Int(64, /*signed=*/true);
+ }
+ fieldType = createFieldType(longArrowType, schema, externalProps);
+ break;
+ case FLOAT:
+ fieldType = createFieldType(new ArrowType.FloatingPoint(SINGLE), schema, externalProps);
+ break;
+ case DOUBLE:
+ fieldType = createFieldType(new ArrowType.FloatingPoint(DOUBLE), schema, externalProps);
+ break;
+ case BYTES:
+ final ArrowType bytesArrowType;
+ if (logicalType instanceof LogicalTypes.Decimal) {
+ bytesArrowType = createDecimalArrowType((LogicalTypes.Decimal) logicalType);
+ } else {
+ bytesArrowType = new ArrowType.Binary();
+ }
+ fieldType = createFieldType(bytesArrowType, schema, externalProps);
+ break;
+ case NULL:
+ fieldType = createFieldType(ArrowType.Null.INSTANCE, schema, externalProps);
+ break;
+ default:
+ // no-op, shouldn't get here
+ throw new UnsupportedOperationException();
+ }
+
+ if (name == null) {
+ name = getDefaultFieldName(fieldType.getType());
+ }
+ return new Field(name, fieldType, children.size() == 0 ? null : children);
+ }
+
+ private static Consumer createArrayConsumer(Schema schema, String name, AvroToArrowConfig config,
+ FieldVector consumerVector) {
+
+ ListVector listVector;
+ if (consumerVector == null) {
+ final Field field = avroSchemaToField(schema, name, config);
+ listVector = (ListVector) field.createVector(config.getAllocator());
+ } else {
+ listVector = (ListVector) consumerVector;
+ }
+
+ FieldVector dataVector = listVector.getDataVector();
+
+ // create delegate
+ Schema childSchema = schema.getElementType();
+ Consumer delegate = createConsumer(childSchema, childSchema.getName(), config, dataVector);
+
+ return new AvroArraysConsumer(listVector, delegate);
+ }
+
+ private static Consumer createStructConsumer(Schema schema, String name, AvroToArrowConfig config,
+ FieldVector consumerVector) {
+
+ final Set<String> skipFieldNames = config.getSkipFieldNames();
+
+ StructVector structVector;
+ if (consumerVector == null) {
+ final Field field = avroSchemaToField(schema, name, config, createExternalProps(schema));
+ structVector = (StructVector) field.createVector(config.getAllocator());
+ } else {
+ structVector = (StructVector) consumerVector;
+ }
+
+ Consumer[] delegates = new Consumer[schema.getFields().size()];
+ int vectorIndex = 0;
+ for (int i = 0; i < schema.getFields().size(); i++) {
+ Schema.Field childField = schema.getFields().get(i);
+ Consumer delegate;
+ // use full name to distinguish fields have same names between parent and child fields.
+ final String fullChildName = String.format("%s.%s", name, childField.name());
+ if (skipFieldNames.contains(fullChildName)) {
+ delegate = createSkipConsumer(childField.schema());
+ } else {
+ delegate = createConsumer(childField.schema(), fullChildName, config,
+ structVector.getChildrenFromFields().get(vectorIndex++));
+ }
+
+ delegates[i] = delegate;
+ }
+
+ return new AvroStructConsumer(structVector, delegates);
+
+ }
+
+ private static Consumer createEnumConsumer(Schema schema, String name, AvroToArrowConfig config,
+ FieldVector consumerVector) {
+
+ BaseIntVector indexVector;
+ if (consumerVector == null) {
+ final Field field = avroSchemaToField(schema, name, config, createExternalProps(schema));
+ indexVector = (BaseIntVector) field.createVector(config.getAllocator());
+ } else {
+ indexVector = (BaseIntVector) consumerVector;
+ }
+
+ final int valueCount = schema.getEnumSymbols().size();
+ VarCharVector dictVector = new VarCharVector(name, config.getAllocator());
+ dictVector.allocateNewSafe();
+ dictVector.setValueCount(valueCount);
+ for (int i = 0; i < valueCount; i++) {
+ dictVector.set(i, schema.getEnumSymbols().get(i).getBytes(StandardCharsets.UTF_8));
+ }
+ Dictionary dictionary =
+ new Dictionary(dictVector, indexVector.getField().getDictionary());
+ config.getProvider().put(dictionary);
+
+ return new AvroEnumConsumer(indexVector);
+
+ }
+
+ private static Consumer createMapConsumer(Schema schema, String name, AvroToArrowConfig config,
+ FieldVector consumerVector) {
+
+ MapVector mapVector;
+ if (consumerVector == null) {
+ final Field field = avroSchemaToField(schema, name, config);
+ mapVector = (MapVector) field.createVector(config.getAllocator());
+ } else {
+ mapVector = (MapVector) consumerVector;
+ }
+
+ // create delegate struct consumer
+ StructVector structVector = (StructVector) mapVector.getDataVector();
+
+ // keys in avro map are always assumed to be strings.
+ Consumer keyConsumer = new AvroStringConsumer(
+ (VarCharVector) structVector.getChildrenFromFields().get(0));
+ Consumer valueConsumer = createConsumer(schema.getValueType(), schema.getValueType().getName(),
+ config, structVector.getChildrenFromFields().get(1));
+
+ AvroStructConsumer internalConsumer =
+ new AvroStructConsumer(structVector, new Consumer[] {keyConsumer, valueConsumer});
+
+ return new AvroMapConsumer(mapVector, internalConsumer);
+ }
+
+ private static Consumer createUnionConsumer(Schema schema, String name, AvroToArrowConfig config,
+ FieldVector consumerVector) {
+ final int size = schema.getTypes().size();
+
+ final boolean nullable = schema.getTypes().stream().anyMatch(t -> t.getType() == Type.NULL);
+
+ UnionVector unionVector;
+ if (consumerVector == null) {
+ final Field field = avroSchemaToField(schema, name, config);
+ unionVector = (UnionVector) field.createVector(config.getAllocator());
+ } else {
+ unionVector = (UnionVector) consumerVector;
+ }
+
+ List<FieldVector> childVectors = unionVector.getChildrenFromFields();
+
+ Consumer[] delegates = new Consumer[size];
+ Types.MinorType[] types = new Types.MinorType[size];
+
+ for (int i = 0; i < size; i++) {
+ FieldVector child = childVectors.get(i);
+ Schema subSchema = schema.getTypes().get(i);
+ Consumer delegate = createConsumer(subSchema, subSchema.getName(), nullable, config, child);
+ delegates[i] = delegate;
+ types[i] = child.getMinorType();
+ }
+ return new AvroUnionsConsumer(unionVector, delegates, types);
+ }
+
+ /**
+ * Read data from {@link Decoder} and generate a {@link VectorSchemaRoot}.
+ * @param schema avro schema
+ * @param decoder avro decoder to read data from
+ */
+ static VectorSchemaRoot avroToArrowVectors(
+ Schema schema,
+ Decoder decoder,
+ AvroToArrowConfig config)
+ throws IOException {
+
+ List<FieldVector> vectors = new ArrayList<>();
+ List<Consumer> consumers = new ArrayList<>();
+ final Set<String> skipFieldNames = config.getSkipFieldNames();
+
+ Schema.Type type = schema.getType();
+ if (type == Type.RECORD) {
+ for (Schema.Field field : schema.getFields()) {
+ if (skipFieldNames.contains(field.name())) {
+ consumers.add(createSkipConsumer(field.schema()));
+ } else {
+ Consumer consumer = createConsumer(field.schema(), field.name(), config);
+ consumers.add(consumer);
+ vectors.add(consumer.getVector());
+ }
+ }
+ } else {
+ Consumer consumer = createConsumer(schema, "", config);
+ consumers.add(consumer);
+ vectors.add(consumer.getVector());
+ }
+
+ long validConsumerCount = consumers.stream().filter(c -> !c.skippable()).count();
+ Preconditions.checkArgument(vectors.size() == validConsumerCount,
+ "vectors size not equals consumers size.");
+
+ List<Field> fields = vectors.stream().map(t -> t.getField()).collect(Collectors.toList());
+
+ VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors, 0);
+
+ CompositeAvroConsumer compositeConsumer = new CompositeAvroConsumer(consumers);
+
+ int valueCount = 0;
+ try {
+ while (true) {
+ ValueVectorUtility.ensureCapacity(root, valueCount + 1);
+ compositeConsumer.consume(decoder);
+ valueCount++;
+ }
+ } catch (EOFException eof) {
+ // reach the end of encoder stream.
+ root.setRowCount(valueCount);
+ } catch (Exception e) {
+ compositeConsumer.close();
+ throw new UnsupportedOperationException("Error occurs while consume process.", e);
+ }
+
+ return root;
+ }
+
+ private static Map<String, String> getMetaData(Schema schema) {
+ Map<String, String> metadata = new HashMap<>();
+ schema.getObjectProps().forEach((k, v) -> metadata.put(k, v.toString()));
+ return metadata;
+ }
+
+ private static Map<String, String> getMetaData(Schema schema, Map<String, String> externalProps) {
+ Map<String, String> metadata = getMetaData(schema);
+ if (externalProps != null) {
+ metadata.putAll(externalProps);
+ }
+ return metadata;
+ }
+
+ /**
+ * Parse avro attributes and convert them to metadata.
+ */
+ private static Map<String, String> createExternalProps(Schema schema) {
+ final Map<String, String> extProps = new HashMap<>();
+ String doc = schema.getDoc();
+ Set<String> aliases = schema.getAliases();
+ if (doc != null) {
+ extProps.put("doc", doc);
+ }
+ if (aliases != null) {
+ extProps.put("aliases", convertAliases(aliases));
+ }
+ return extProps;
+ }
+
+ private static FieldType createFieldType(ArrowType arrowType, Schema schema, Map<String, String> externalProps) {
+ return createFieldType(arrowType, schema, externalProps, /*dictionary=*/null);
+ }
+
+ private static FieldType createFieldType(
+ ArrowType arrowType,
+ Schema schema,
+ Map<String, String> externalProps,
+ DictionaryEncoding dictionary) {
+
+ return new FieldType(/*nullable=*/false, arrowType, dictionary,
+ getMetaData(schema, externalProps));
+ }
+
+ private static String convertAliases(Set<String> aliases) {
+ JsonStringArrayList jsonList = new JsonStringArrayList();
+ aliases.stream().forEach(a -> jsonList.add(a));
+ return jsonList.toString();
+ }
+}
diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrowVectorIterator.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrowVectorIterator.java
new file mode 100644
index 000000000..1faa7595c
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrowVectorIterator.java
@@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow;
+
+import java.io.EOFException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import org.apache.arrow.consumers.CompositeAvroConsumer;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.util.ValueVectorUtility;
+import org.apache.avro.Schema;
+import org.apache.avro.io.Decoder;
+
+/**
+ * VectorSchemaRoot iterator for partially converting avro data.
+ */
+public class AvroToArrowVectorIterator implements Iterator<VectorSchemaRoot>, AutoCloseable {
+
+ public static final int NO_LIMIT_BATCH_SIZE = -1;
+ public static final int DEFAULT_BATCH_SIZE = 1024;
+
+ private final Decoder decoder;
+ private final Schema schema;
+
+ private final AvroToArrowConfig config;
+
+ private CompositeAvroConsumer compositeConsumer;
+
+ private org.apache.arrow.vector.types.pojo.Schema rootSchema;
+
+ private VectorSchemaRoot nextBatch;
+
+ private final int targetBatchSize;
+
+ /**
+ * Construct an instance.
+ */
+ private AvroToArrowVectorIterator(
+ Decoder decoder,
+ Schema schema,
+ AvroToArrowConfig config) {
+
+ this.decoder = decoder;
+ this.schema = schema;
+ this.config = config;
+ this.targetBatchSize = config.getTargetBatchSize();
+
+ }
+
+ /**
+ * Create a ArrowVectorIterator to partially convert data.
+ */
+ public static AvroToArrowVectorIterator create(
+ Decoder decoder,
+ Schema schema,
+ AvroToArrowConfig config) {
+
+ AvroToArrowVectorIterator iterator = new AvroToArrowVectorIterator(decoder, schema, config);
+ try {
+ iterator.initialize();
+ return iterator;
+ } catch (Exception e) {
+ iterator.close();
+ throw new RuntimeException("Error occurs while creating iterator.", e);
+ }
+ }
+
+ private void initialize() {
+ // create consumers
+ compositeConsumer = AvroToArrowUtils.createCompositeConsumer(schema, config);
+ List<FieldVector> vectors = new ArrayList<>();
+ compositeConsumer.getConsumers().forEach(c -> vectors.add(c.getVector()));
+ List<Field> fields = vectors.stream().map(t -> t.getField()).collect(Collectors.toList());
+ VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors, 0);
+ rootSchema = root.getSchema();
+
+ load(root);
+ }
+
+ private void consumeData(VectorSchemaRoot root) {
+ int readRowCount = 0;
+ try {
+ while ((targetBatchSize == NO_LIMIT_BATCH_SIZE || readRowCount < targetBatchSize)) {
+ compositeConsumer.consume(decoder);
+ readRowCount++;
+ }
+
+ if (targetBatchSize == NO_LIMIT_BATCH_SIZE) {
+ while (true) {
+ ValueVectorUtility.ensureCapacity(root, readRowCount + 1);
+ compositeConsumer.consume(decoder);
+ readRowCount++;
+ }
+ } else {
+ while (readRowCount < targetBatchSize) {
+ compositeConsumer.consume(decoder);
+ readRowCount++;
+ }
+ }
+
+ root.setRowCount(readRowCount);
+ } catch (EOFException eof) {
+ // reach the end of encoder stream.
+ root.setRowCount(readRowCount);
+ } catch (Exception e) {
+ compositeConsumer.close();
+ throw new RuntimeException("Error occurs while consuming data.", e);
+ }
+ }
+
+ // Loads the next schema root or null if no more rows are available.
+ private void load(VectorSchemaRoot root) {
+ final int targetBatchSize = config.getTargetBatchSize();
+ if (targetBatchSize != NO_LIMIT_BATCH_SIZE) {
+ ValueVectorUtility.preAllocate(root, targetBatchSize);
+ }
+
+ long validConsumerCount = compositeConsumer.getConsumers().stream().filter(c ->
+ !c.skippable()).count();
+ Preconditions.checkArgument(root.getFieldVectors().size() == validConsumerCount,
+ "Schema root vectors size not equals to consumers size.");
+
+ compositeConsumer.resetConsumerVectors(root);
+
+ // consume data
+ consumeData(root);
+
+ if (root.getRowCount() == 0) {
+ root.close();
+ nextBatch = null;
+ } else {
+ nextBatch = root;
+ }
+ }
+
+ @Override
+ public boolean hasNext() {
+ return nextBatch != null;
+ }
+
+ /**
+ * Gets the next vector. The user is responsible for freeing its resources.
+ */
+ public VectorSchemaRoot next() {
+ Preconditions.checkArgument(hasNext());
+ VectorSchemaRoot returned = nextBatch;
+ try {
+ load(VectorSchemaRoot.create(rootSchema, config.getAllocator()));
+ } catch (Exception e) {
+ returned.close();
+ throw new RuntimeException("Error occurs while getting next schema root.", e);
+ }
+ return returned;
+ }
+
+ /**
+ * Clean up resources.
+ */
+ public void close() {
+ if (nextBatch != null) {
+ nextBatch.close();
+ }
+ compositeConsumer.close();
+ }
+}
diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroArraysConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroArraysConsumer.java
new file mode 100644
index 000000000..b9d0f84cf
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroArraysConsumer.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.consumers;
+
+import java.io.IOException;
+
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.avro.io.Decoder;
+
+/**
+ * Consumer which consume array type values from avro decoder.
+ * Write the data to {@link ListVector}.
+ */
+public class AvroArraysConsumer extends BaseAvroConsumer<ListVector> {
+
+ private final Consumer delegate;
+
+ /**
+ * Instantiate a ArrayConsumer.
+ */
+ public AvroArraysConsumer(ListVector vector, Consumer delegate) {
+ super(vector);
+ this.delegate = delegate;
+ }
+
+ @Override
+ public void consume(Decoder decoder) throws IOException {
+
+ vector.startNewValue(currentIndex);
+ long totalCount = 0;
+ for (long count = decoder.readArrayStart(); count != 0; count = decoder.arrayNext()) {
+ totalCount += count;
+ ensureInnerVectorCapacity(totalCount);
+ for (int element = 0; element < count; element++) {
+ delegate.consume(decoder);
+ }
+ }
+ vector.endValue(currentIndex, (int) totalCount);
+ currentIndex++;
+ }
+
+ @Override
+ public void close() throws Exception {
+ super.close();
+ delegate.close();
+ }
+
+ @Override
+ public boolean resetValueVector(ListVector vector) {
+ this.delegate.resetValueVector(vector.getDataVector());
+ return super.resetValueVector(vector);
+ }
+
+ void ensureInnerVectorCapacity(long targetCapacity) {
+ while (vector.getDataVector().getValueCapacity() < targetCapacity) {
+ vector.getDataVector().reAlloc();
+ }
+ }
+}
diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroBooleanConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroBooleanConsumer.java
new file mode 100644
index 000000000..4ca5f2445
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroBooleanConsumer.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.consumers;
+
+import java.io.IOException;
+
+import org.apache.arrow.vector.BitVector;
+import org.apache.avro.io.Decoder;
+
+/**
+ * Consumer which consume boolean type values from avro decoder.
+ * Write the data to {@link BitVector}.
+ */
+public class AvroBooleanConsumer extends BaseAvroConsumer<BitVector> {
+
+ /**
+ * Instantiate a AvroBooleanConsumer.
+ */
+ public AvroBooleanConsumer(BitVector vector) {
+ super(vector);
+ }
+
+ @Override
+ public void consume(Decoder decoder) throws IOException {
+ vector.set(currentIndex, decoder.readBoolean() ? 1 : 0);
+ currentIndex++;
+ }
+}
diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroBytesConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroBytesConsumer.java
new file mode 100644
index 000000000..eede68ebd
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroBytesConsumer.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.consumers;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import org.apache.arrow.vector.VarBinaryVector;
+import org.apache.avro.io.Decoder;
+
+/**
+ * Consumer which consume bytes type values from avro decoder.
+ * Write the data to {@link VarBinaryVector}.
+ */
+public class AvroBytesConsumer extends BaseAvroConsumer<VarBinaryVector> {
+
+ private ByteBuffer cacheBuffer;
+
+ /**
+ * Instantiate a AvroBytesConsumer.
+ */
+ public AvroBytesConsumer(VarBinaryVector vector) {
+ super(vector);
+ }
+
+ @Override
+ public void consume(Decoder decoder) throws IOException {
+ // cacheBuffer is initialized null and create in the first consume,
+ // if its capacity < size to read, decoder will create a new one with new capacity.
+ cacheBuffer = decoder.readBytes(cacheBuffer);
+ vector.setSafe(currentIndex, cacheBuffer, 0, cacheBuffer.limit());
+ currentIndex++;
+ }
+}
diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroDoubleConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroDoubleConsumer.java
new file mode 100644
index 000000000..356707a14
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroDoubleConsumer.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.consumers;
+
+import java.io.IOException;
+
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.avro.io.Decoder;
+
+/**
+ * Consumer which consume double type values from avro decoder.
+ * Write the data to {@link Float8Vector}.
+ */
+public class AvroDoubleConsumer extends BaseAvroConsumer<Float8Vector> {
+
+ /**
+ * Instantiate a AvroDoubleConsumer.
+ */
+ public AvroDoubleConsumer(Float8Vector vector) {
+ super(vector);
+ }
+
+ @Override
+ public void consume(Decoder decoder) throws IOException {
+ vector.set(currentIndex++, decoder.readDouble());
+ }
+}
diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroEnumConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroEnumConsumer.java
new file mode 100644
index 000000000..2f4443b74
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroEnumConsumer.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.consumers;
+
+import java.io.IOException;
+
+import org.apache.arrow.vector.BaseIntVector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.avro.io.Decoder;
+
+/**
+ * Consumer which consume enum type values from avro decoder.
+ * Write the data to {@link IntVector}.
+ */
+public class AvroEnumConsumer extends BaseAvroConsumer<BaseIntVector> {
+
+ /**
+ * Instantiate a AvroEnumConsumer.
+ */
+ public AvroEnumConsumer(BaseIntVector vector) {
+ super(vector);
+ }
+
+ @Override
+ public void consume(Decoder decoder) throws IOException {
+ vector.setWithPossibleTruncate(currentIndex++, decoder.readEnum());
+ }
+}
diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroFixedConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroFixedConsumer.java
new file mode 100644
index 000000000..a065466e3
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroFixedConsumer.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.consumers;
+
+import java.io.IOException;
+
+import org.apache.arrow.vector.FixedSizeBinaryVector;
+import org.apache.avro.io.Decoder;
+
+/**
+ * Consumer which consume fixed type values from avro decoder.
+ * Write the data to {@link org.apache.arrow.vector.FixedSizeBinaryVector}.
+ */
+public class AvroFixedConsumer extends BaseAvroConsumer<FixedSizeBinaryVector> {
+
+ private final byte[] reuseBytes;
+
+ /**
+ * Instantiate a AvroFixedConsumer.
+ */
+ public AvroFixedConsumer(FixedSizeBinaryVector vector, int size) {
+ super(vector);
+ reuseBytes = new byte[size];
+ }
+
+ @Override
+ public void consume(Decoder decoder) throws IOException {
+ decoder.readFixed(reuseBytes);
+ vector.setSafe(currentIndex++, reuseBytes);
+ }
+}
diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroFloatConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroFloatConsumer.java
new file mode 100644
index 000000000..c8de4a21a
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroFloatConsumer.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.consumers;
+
+import java.io.IOException;
+
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.avro.io.Decoder;
+
+/**
+ * Consumer which consume float type values from avro decoder.
+ * Write the data to {@link Float4Vector}.
+ */
+public class AvroFloatConsumer extends BaseAvroConsumer<Float4Vector> {
+
+ /**
+ * Instantiate a AvroFloatConsumer.
+ */
+ public AvroFloatConsumer(Float4Vector vector) {
+ super(vector);
+ }
+
+ @Override
+ public void consume(Decoder decoder) throws IOException {
+ vector.set(currentIndex++, decoder.readFloat());
+ }
+}
diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroIntConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroIntConsumer.java
new file mode 100644
index 000000000..bc8d4de78
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroIntConsumer.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.consumers;
+
+import java.io.IOException;
+
+import org.apache.arrow.vector.IntVector;
+import org.apache.avro.io.Decoder;
+
+/**
+ * Consumer which consume int type values from avro decoder.
+ * Write the data to {@link IntVector}.
+ */
+public class AvroIntConsumer extends BaseAvroConsumer<IntVector> {
+
+ /**
+ * Instantiate a AvroIntConsumer.
+ */
+ public AvroIntConsumer(IntVector vector) {
+ super(vector);
+ }
+
+ @Override
+ public void consume(Decoder decoder) throws IOException {
+ vector.set(currentIndex++, decoder.readInt());
+ }
+}
diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroLongConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroLongConsumer.java
new file mode 100644
index 000000000..b9016c58f
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroLongConsumer.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.consumers;
+
+import java.io.IOException;
+
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.avro.io.Decoder;
+
+/**
+ * Consumer which consume long type values from avro decoder.
+ * Write the data to {@link BigIntVector}.
+ */
+public class AvroLongConsumer extends BaseAvroConsumer<BigIntVector> {
+
+ /**
+ * Instantiate a AvroLongConsumer.
+ */
+ public AvroLongConsumer(BigIntVector vector) {
+ super(vector);
+ }
+
+ @Override
+ public void consume(Decoder decoder) throws IOException {
+ vector.set(currentIndex++, decoder.readLong());
+ }
+}
diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroMapConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroMapConsumer.java
new file mode 100644
index 000000000..b8e8bd585
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroMapConsumer.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.consumers;
+
+import java.io.IOException;
+
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.complex.MapVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.avro.io.Decoder;
+
+/**
+ * Consumer which consume map type values from avro decoder.
+ * Write the data to {@link MapVector}.
+ */
+public class AvroMapConsumer extends BaseAvroConsumer<MapVector> {
+
+ private final Consumer delegate;
+
+ /**
+ * Instantiate a AvroMapConsumer.
+ */
+ public AvroMapConsumer(MapVector vector, Consumer delegate) {
+ super(vector);
+ this.delegate = delegate;
+ }
+
+ @Override
+ public void consume(Decoder decoder) throws IOException {
+
+ vector.startNewValue(currentIndex);
+ long totalCount = 0;
+ for (long count = decoder.readMapStart(); count != 0; count = decoder.mapNext()) {
+ totalCount += count;
+ ensureInnerVectorCapacity(totalCount);
+ for (int element = 0; element < count; element++) {
+ delegate.consume(decoder);
+ }
+ }
+ vector.endValue(currentIndex, (int) totalCount);
+ currentIndex++;
+ }
+
+ @Override
+ public void close() throws Exception {
+ super.close();
+ delegate.close();
+ }
+
+ @Override
+ public boolean resetValueVector(MapVector vector) {
+ this.delegate.resetValueVector(vector.getDataVector());
+ return super.resetValueVector(vector);
+ }
+
+ void ensureInnerVectorCapacity(long targetCapacity) {
+ StructVector innerVector = (StructVector) vector.getDataVector();
+ for (FieldVector v : innerVector.getChildrenFromFields()) {
+ while (v.getValueCapacity() < targetCapacity) {
+ v.reAlloc();
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroNullConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroNullConsumer.java
new file mode 100644
index 000000000..64768008a
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroNullConsumer.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.consumers;
+
+import java.io.IOException;
+
+import org.apache.arrow.vector.NullVector;
+import org.apache.avro.io.Decoder;
+
+/**
+ * Consumer which consume null type values from avro decoder.
+ * Corresponding to {@link org.apache.arrow.vector.NullVector}.
+ */
+public class AvroNullConsumer extends BaseAvroConsumer<NullVector> {
+
+ public AvroNullConsumer(NullVector vector) {
+ super(vector);
+ }
+
+ @Override
+ public void consume(Decoder decoder) throws IOException {
+ currentIndex++;
+ }
+}
diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroStringConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroStringConsumer.java
new file mode 100644
index 000000000..10fe234ac
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroStringConsumer.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.consumers;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.avro.io.Decoder;
+
+/**
+ * Consumer which consume string type values from avro decoder.
+ * Write the data to {@link VarCharVector}.
+ */
+public class AvroStringConsumer extends BaseAvroConsumer<VarCharVector> {
+
+ private ByteBuffer cacheBuffer;
+
+ /**
+ * Instantiate a AvroStringConsumer.
+ */
+ public AvroStringConsumer(VarCharVector vector) {
+ super(vector);
+ }
+
+ @Override
+ public void consume(Decoder decoder) throws IOException {
+ // cacheBuffer is initialized null and create in the first consume,
+ // if its capacity < size to read, decoder will create a new one with new capacity.
+ cacheBuffer = decoder.readBytes(cacheBuffer);
+ vector.setSafe(currentIndex++, cacheBuffer, 0, cacheBuffer.limit());
+ }
+}
diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroStructConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroStructConsumer.java
new file mode 100644
index 000000000..792d01ee5
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroStructConsumer.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.consumers;
+
+import java.io.IOException;
+
+import org.apache.arrow.util.AutoCloseables;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.avro.io.Decoder;
+
+/**
+ * Consumer which consume nested record type values from avro decoder.
+ * Write the data to {@link org.apache.arrow.vector.complex.StructVector}.
+ */
+public class AvroStructConsumer extends BaseAvroConsumer<StructVector> {
+
+ private final Consumer[] delegates;
+
+ /**
+ * Instantiate a AvroStructConsumer.
+ */
+ public AvroStructConsumer(StructVector vector, Consumer[] delegates) {
+ super(vector);
+ this.delegates = delegates;
+ }
+
+ @Override
+ public void consume(Decoder decoder) throws IOException {
+
+ ensureInnerVectorCapacity(currentIndex + 1);
+ for (int i = 0; i < delegates.length; i++) {
+ delegates[i].consume(decoder);
+ }
+ vector.setIndexDefined(currentIndex);
+ currentIndex++;
+
+ }
+
+ @Override
+ public void close() throws Exception {
+ super.close();
+ AutoCloseables.close(delegates);
+ }
+
+ @Override
+ public boolean resetValueVector(StructVector vector) {
+ for (int i = 0; i < delegates.length; i++) {
+ delegates[i].resetValueVector(vector.getChildrenFromFields().get(i));
+ }
+ return super.resetValueVector(vector);
+ }
+
+ void ensureInnerVectorCapacity(long targetCapacity) {
+ for (FieldVector v : vector.getChildrenFromFields()) {
+ while (v.getValueCapacity() < targetCapacity) {
+ v.reAlloc();
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroUnionsConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroUnionsConsumer.java
new file mode 100644
index 000000000..c0bb0200f
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroUnionsConsumer.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.consumers;
+
+import java.io.IOException;
+
+import org.apache.arrow.util.AutoCloseables;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.types.Types;
+import org.apache.avro.io.Decoder;
+
+/**
+ * Consumer which consume unions type values from avro decoder.
+ * Write the data to {@link org.apache.arrow.vector.complex.UnionVector}.
+ */
+public class AvroUnionsConsumer extends BaseAvroConsumer<UnionVector> {
+
+ private Consumer[] delegates;
+ private Types.MinorType[] types;
+
+ /**
+ * Instantiate an AvroUnionConsumer.
+ */
+ public AvroUnionsConsumer(UnionVector vector, Consumer[] delegates, Types.MinorType[] types) {
+
+ super(vector);
+ this.delegates = delegates;
+ this.types = types;
+ }
+
+ @Override
+ public void consume(Decoder decoder) throws IOException {
+ int fieldIndex = decoder.readInt();
+
+ ensureInnerVectorCapacity(currentIndex + 1, fieldIndex);
+ Consumer delegate = delegates[fieldIndex];
+
+ vector.setType(currentIndex, types[fieldIndex]);
+ // In UnionVector we need to set sub vector writer position before consume a value
+ // because in the previous iterations we might not have written to the specific union sub vector.
+ delegate.setPosition(currentIndex);
+ delegate.consume(decoder);
+
+ currentIndex++;
+ }
+
+ @Override
+ public void close() throws Exception {
+ super.close();
+ AutoCloseables.close(delegates);
+ }
+
+ @Override
+ public boolean resetValueVector(UnionVector vector) {
+ for (int i = 0; i < delegates.length; i++) {
+ delegates[i].resetValueVector(vector.getChildrenFromFields().get(i));
+ }
+ return super.resetValueVector(vector);
+ }
+
+ void ensureInnerVectorCapacity(long targetCapacity, int fieldIndex) {
+ ValueVector fieldVector = vector.getChildrenFromFields().get(fieldIndex);
+ if (fieldVector.getMinorType() == Types.MinorType.NULL) {
+ return;
+ }
+ while (fieldVector.getValueCapacity() < targetCapacity) {
+ fieldVector.reAlloc();
+ }
+ }
+}
diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/BaseAvroConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/BaseAvroConsumer.java
new file mode 100644
index 000000000..303be8e50
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/BaseAvroConsumer.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.consumers;
+
+import org.apache.arrow.vector.FieldVector;
+
+/**
+ * Base class for non-skippable avro consumers.
+ * @param <T> vector type.
+ */
+public abstract class BaseAvroConsumer<T extends FieldVector> implements Consumer<T> {
+
+ protected T vector;
+ protected int currentIndex;
+
+ /**
+ * Constructs a base avro consumer.
+ * @param vector the vector to consume.
+ */
+ public BaseAvroConsumer(T vector) {
+ this.vector = vector;
+ }
+
+ @Override
+ public void addNull() {
+ currentIndex++;
+ }
+
+ @Override
+ public void setPosition(int index) {
+ currentIndex = index;
+ }
+
+ @Override
+ public FieldVector getVector() {
+ return vector;
+ }
+
+ @Override
+ public void close() throws Exception {
+ vector.close();
+ }
+
+ @Override
+ public boolean resetValueVector(T vector) {
+ this.vector = vector;
+ this.currentIndex = 0;
+ return true;
+ }
+}
diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/CompositeAvroConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/CompositeAvroConsumer.java
new file mode 100644
index 000000000..af476d27c
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/CompositeAvroConsumer.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.consumers;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.arrow.util.AutoCloseables;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.avro.io.Decoder;
+
+/**
+ * Composite consumer which hold all consumers.
+ * It manages the consume and cleanup process.
+ */
+public class CompositeAvroConsumer implements AutoCloseable {
+
+ private final List<Consumer> consumers;
+
+ public List<Consumer> getConsumers() {
+ return consumers;
+ }
+
+ public CompositeAvroConsumer(List<Consumer> consumers) {
+ this.consumers = consumers;
+ }
+
+ /**
+ * Consume decoder data.
+ */
+ public void consume(Decoder decoder) throws IOException {
+ for (Consumer consumer : consumers) {
+ consumer.consume(decoder);
+ }
+ }
+
+ /**
+ * Reset vector of consumers with the given {@link VectorSchemaRoot}.
+ */
+ public void resetConsumerVectors(VectorSchemaRoot root) {
+ int index = 0;
+ for (Consumer consumer : consumers) {
+ if (consumer.resetValueVector(root.getFieldVectors().get(index))) {
+ index++;
+ }
+ }
+ }
+
+ @Override
+ public void close() {
+ // clean up
+ try {
+ AutoCloseables.close(consumers);
+ } catch (Exception e) {
+ throw new RuntimeException("Error occurs in close.", e);
+ }
+ }
+}
diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/Consumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/Consumer.java
new file mode 100644
index 000000000..8c4ee9a96
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/Consumer.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.consumers;
+
+import java.io.IOException;
+
+import org.apache.arrow.vector.FieldVector;
+import org.apache.avro.io.Decoder;
+
+/**
+ * Interface that is used to consume values from avro decoder.
+ * @param <T> The vector within consumer or its delegate, used for partially consume purpose.
+ */
+public interface Consumer<T extends FieldVector> extends AutoCloseable {
+
+ /**
+ * Consume a specific type value from avro decoder and write it to vector.
+ * @param decoder avro decoder to read data
+ * @throws IOException on error
+ */
+ void consume(Decoder decoder) throws IOException;
+
+ /**
+ * Add null value to vector by making writer position + 1.
+ */
+ void addNull();
+
+ /**
+ * Set the position to write value into vector.
+ */
+ void setPosition(int index);
+
+ /**
+ * Get the vector within the consumer.
+ */
+ FieldVector getVector();
+
+ /**
+ * Close this consumer when occurs exception to avoid potential leak.
+ */
+ void close() throws Exception;
+
+ /**
+ * Reset the vector within consumer for partial read purpose.
+ * @return true if reset is successful, false if reset is not needed.
+ */
+ boolean resetValueVector(T vector);
+
+ /**
+ * Indicates whether the consumer is type of {@link SkipConsumer}.
+ */
+ default boolean skippable() {
+ return false;
+ }
+
+}
diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/SkipConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/SkipConsumer.java
new file mode 100644
index 000000000..94c5b339d
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/SkipConsumer.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.consumers;
+
+import java.io.IOException;
+
+import org.apache.arrow.vector.FieldVector;
+import org.apache.avro.io.Decoder;
+
+/**
+ * Consumer which skip (throw away) data from the decoder.
+ */
+public class SkipConsumer implements Consumer {
+
+ private final SkipFunction skipFunction;
+
+ public SkipConsumer(SkipFunction skipFunction) {
+ this.skipFunction = skipFunction;
+ }
+
+ @Override
+ public void consume(Decoder decoder) throws IOException {
+ skipFunction.apply(decoder);
+ }
+
+ @Override
+ public void addNull() {
+ }
+
+ @Override
+ public void setPosition(int index) {
+ }
+
+ @Override
+ public FieldVector getVector() {
+ return null;
+ }
+
+ @Override
+ public void close() throws Exception {
+ }
+
+ @Override
+ public boolean resetValueVector(FieldVector vector) {
+ return false;
+ }
+
+ @Override
+ public boolean skippable() {
+ return true;
+ }
+}
diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/SkipFunction.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/SkipFunction.java
new file mode 100644
index 000000000..61938916a
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/SkipFunction.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.consumers;
+
+import java.io.IOException;
+
+import org.apache.avro.io.Decoder;
+
+/**
+ * Adapter function to skip (throw away) data from the decoder.
+ */
+@FunctionalInterface
+public interface SkipFunction {
+ void apply(Decoder decoder) throws IOException;
+}
diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroDateConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroDateConsumer.java
new file mode 100644
index 000000000..3aa8970d9
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroDateConsumer.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.consumers.logical;
+
+import java.io.IOException;
+
+import org.apache.arrow.consumers.BaseAvroConsumer;
+import org.apache.arrow.vector.DateDayVector;
+import org.apache.avro.io.Decoder;
+
+/**
+ * Consumer which consume date type values from avro decoder.
+ * Write the data to {@link DateDayVector}.
+ */
+public class AvroDateConsumer extends BaseAvroConsumer<DateDayVector> {
+
+ /**
+ * Instantiate a AvroDateConsumer.
+ */
+ public AvroDateConsumer(DateDayVector vector) {
+ super(vector);
+ }
+
+ @Override
+ public void consume(Decoder decoder) throws IOException {
+ vector.set(currentIndex++, decoder.readInt());
+ }
+}
diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroDecimalConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroDecimalConsumer.java
new file mode 100644
index 000000000..24d73cf82
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroDecimalConsumer.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.consumers.logical;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import org.apache.arrow.consumers.BaseAvroConsumer;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.DecimalVector;
+import org.apache.avro.io.Decoder;
+
+/**
+ * Consumer which consume decimal type values from avro decoder.
+ * Write the data to {@link DecimalVector}.
+ */
+public abstract class AvroDecimalConsumer extends BaseAvroConsumer<DecimalVector> {
+
+ /**
+ * Instantiate a AvroDecimalConsumer.
+ */
+ public AvroDecimalConsumer(DecimalVector vector) {
+ super(vector);
+ }
+
+ /**
+ * Consumer for decimal logical type with original bytes type.
+ */
+ public static class BytesDecimalConsumer extends AvroDecimalConsumer {
+
+ private ByteBuffer cacheBuffer;
+
+ /**
+ * Instantiate a BytesDecimalConsumer.
+ */
+ public BytesDecimalConsumer(DecimalVector vector) {
+ super(vector);
+ }
+
+ @Override
+ public void consume(Decoder decoder) throws IOException {
+ cacheBuffer = decoder.readBytes(cacheBuffer);
+ byte[] bytes = new byte[cacheBuffer.limit()];
+ Preconditions.checkArgument(bytes.length <= 16, "Decimal bytes length should <= 16.");
+ cacheBuffer.get(bytes);
+ vector.setBigEndian(currentIndex++, bytes);
+ }
+
+ }
+
+ /**
+ * Consumer for decimal logical type with original fixed type.
+ */
+ public static class FixedDecimalConsumer extends AvroDecimalConsumer {
+
+ private byte[] reuseBytes;
+
+ /**
+ * Instantiate a FixedDecimalConsumer.
+ */
+ public FixedDecimalConsumer(DecimalVector vector, int size) {
+ super(vector);
+ Preconditions.checkArgument(size <= 16, "Decimal bytes length should <= 16.");
+ reuseBytes = new byte[size];
+ }
+
+ @Override
+ public void consume(Decoder decoder) throws IOException {
+ decoder.readFixed(reuseBytes);
+ vector.setBigEndian(currentIndex++, reuseBytes);
+ }
+ }
+}
diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroTimeMicroConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroTimeMicroConsumer.java
new file mode 100644
index 000000000..e68ba158f
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroTimeMicroConsumer.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.consumers.logical;
+
+import java.io.IOException;
+
+import org.apache.arrow.consumers.BaseAvroConsumer;
+import org.apache.arrow.vector.TimeMicroVector;
+import org.apache.avro.io.Decoder;
+
+/**
+ * Consumer which consume date time-micro values from avro decoder.
+ * Write the data to {@link TimeMicroVector}.
+ */
+public class AvroTimeMicroConsumer extends BaseAvroConsumer<TimeMicroVector> {
+
+ /**
+ * Instantiate a AvroTimeMicroConsumer.
+ */
+ public AvroTimeMicroConsumer(TimeMicroVector vector) {
+ super(vector);
+ }
+
+ @Override
+ public void consume(Decoder decoder) throws IOException {
+ vector.set(currentIndex++, decoder.readLong());
+ }
+}
diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroTimeMillisConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroTimeMillisConsumer.java
new file mode 100644
index 000000000..f76186fc3
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroTimeMillisConsumer.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.consumers.logical;
+
+import java.io.IOException;
+
+import org.apache.arrow.consumers.BaseAvroConsumer;
+import org.apache.arrow.vector.TimeMilliVector;
+import org.apache.avro.io.Decoder;
+
+/**
+ * Consumer which consume date time-millis values from avro decoder.
+ * Write the data to {@link TimeMilliVector}.
+ */
+public class AvroTimeMillisConsumer extends BaseAvroConsumer<TimeMilliVector> {
+
+ /**
+ * Instantiate a AvroTimeMilliConsumer.
+ */
+ public AvroTimeMillisConsumer(TimeMilliVector vector) {
+ super(vector);
+ }
+
+ @Override
+ public void consume(Decoder decoder) throws IOException {
+ vector.set(currentIndex++, decoder.readInt());
+ }
+}
diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroTimestampMicrosConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroTimestampMicrosConsumer.java
new file mode 100644
index 000000000..82da0e805
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroTimestampMicrosConsumer.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.consumers.logical;
+
+import java.io.IOException;
+
+import org.apache.arrow.consumers.BaseAvroConsumer;
+import org.apache.arrow.vector.TimeStampMicroVector;
+import org.apache.avro.io.Decoder;
+
+/**
+ * Consumer which consume date timestamp-micro values from avro decoder.
+ * Write the data to {@link TimeStampMicroVector}.
+ */
+public class AvroTimestampMicrosConsumer extends BaseAvroConsumer<TimeStampMicroVector> {
+
+ /**
+ * Instantiate a AvroTimestampMicroConsumer.
+ */
+ public AvroTimestampMicrosConsumer(TimeStampMicroVector vector) {
+ super(vector);
+ }
+
+ @Override
+ public void consume(Decoder decoder) throws IOException {
+ vector.set(currentIndex++, decoder.readLong());
+ }
+}
diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroTimestampMillisConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroTimestampMillisConsumer.java
new file mode 100644
index 000000000..159f49e14
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroTimestampMillisConsumer.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.consumers.logical;
+
+import java.io.IOException;
+
+import org.apache.arrow.consumers.BaseAvroConsumer;
+import org.apache.arrow.vector.TimeStampMilliVector;
+import org.apache.avro.io.Decoder;
+
+/**
+ * Consumer which consume date timestamp-millis values from avro decoder.
+ * Write the data to {@link TimeStampMilliVector}.
+ */
+public class AvroTimestampMillisConsumer extends BaseAvroConsumer<TimeStampMilliVector> {
+
+ /**
+ * Instantiate a AvroTimestampMillisConsumer.
+ */
+ public AvroTimestampMillisConsumer(TimeStampMilliVector vector) {
+ super(vector);
+ }
+
+ @Override
+ public void consume(Decoder decoder) throws IOException {
+ vector.set(currentIndex++, decoder.readLong());
+ }
+}
diff --git a/src/arrow/java/adapter/avro/src/test/java/org/apache/arrow/AvroLogicalTypesTest.java b/src/arrow/java/adapter/avro/src/test/java/org/apache/arrow/AvroLogicalTypesTest.java
new file mode 100644
index 000000000..050a50dda
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/java/org/apache/arrow/AvroLogicalTypesTest.java
@@ -0,0 +1,201 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow;
+
+import static junit.framework.TestCase.assertNull;
+import static junit.framework.TestCase.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import java.math.BigDecimal;
+import java.nio.ByteBuffer;
+import java.time.LocalDateTime;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.util.DateUtility;
+import org.apache.avro.Conversions;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericFixed;
+import org.junit.Test;
+
+public class AvroLogicalTypesTest extends AvroTestBase {
+
+ @Test
+ public void testTimestampMicros() throws Exception {
+ Schema schema = getSchema("logical/test_timestamp_micros.avsc");
+
+ List<Long> data = Arrays.asList(10000L, 20000L, 30000L, 40000L, 50000L);
+ List<LocalDateTime> expected = Arrays.asList(
+ DateUtility.getLocalDateTimeFromEpochMicro(10000),
+ DateUtility.getLocalDateTimeFromEpochMicro(20000),
+ DateUtility.getLocalDateTimeFromEpochMicro(30000),
+ DateUtility.getLocalDateTimeFromEpochMicro(40000),
+ DateUtility.getLocalDateTimeFromEpochMicro(50000)
+ );
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ FieldVector vector = root.getFieldVectors().get(0);
+
+ checkPrimitiveResult(expected, vector);
+ }
+
+ @Test
+ public void testTimestampMillis() throws Exception {
+ Schema schema = getSchema("logical/test_timestamp_millis.avsc");
+
+ List<Long> data = Arrays.asList(10000L, 20000L, 30000L, 40000L, 50000L);
+ List<LocalDateTime> expected = Arrays.asList(
+ DateUtility.getLocalDateTimeFromEpochMilli(10000),
+ DateUtility.getLocalDateTimeFromEpochMilli(20000),
+ DateUtility.getLocalDateTimeFromEpochMilli(30000),
+ DateUtility.getLocalDateTimeFromEpochMilli(40000),
+ DateUtility.getLocalDateTimeFromEpochMilli(50000)
+ );
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ FieldVector vector = root.getFieldVectors().get(0);
+
+ checkPrimitiveResult(expected, vector);
+ }
+
+ @Test
+ public void testTimeMicros() throws Exception {
+ Schema schema = getSchema("logical/test_time_micros.avsc");
+
+ List<Long> data = Arrays.asList(10000L, 20000L, 30000L, 40000L, 50000L);
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ FieldVector vector = root.getFieldVectors().get(0);
+
+ checkPrimitiveResult(data, vector);
+ }
+
+ @Test
+ public void testTimeMillis() throws Exception {
+ Schema schema = getSchema("logical/test_time_millis.avsc");
+
+ List<Integer> data = Arrays.asList(100, 200, 300, 400, 500);
+ List<LocalDateTime> expected = Arrays.asList(
+ DateUtility.getLocalDateTimeFromEpochMilli(100),
+ DateUtility.getLocalDateTimeFromEpochMilli(200),
+ DateUtility.getLocalDateTimeFromEpochMilli(300),
+ DateUtility.getLocalDateTimeFromEpochMilli(400),
+ DateUtility.getLocalDateTimeFromEpochMilli(500)
+ );
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ FieldVector vector = root.getFieldVectors().get(0);
+
+ checkPrimitiveResult(expected, vector);
+ }
+
+ @Test
+ public void testDate() throws Exception {
+ Schema schema = getSchema("logical/test_date.avsc");
+
+ List<Integer> data = Arrays.asList(100, 200, 300, 400, 500);
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ FieldVector vector = root.getFieldVectors().get(0);
+
+ checkPrimitiveResult(data, vector);
+ }
+
+ @Test
+ public void testDecimalWithOriginalBytes() throws Exception {
+ Schema schema = getSchema("logical/test_decimal_with_original_bytes.avsc");
+ List<ByteBuffer> data = new ArrayList<>();
+ List<BigDecimal> expected = new ArrayList<>();
+
+ Conversions.DecimalConversion conversion = new Conversions.DecimalConversion();
+
+ for (int i = 0; i < 5; i++) {
+ BigDecimal value = new BigDecimal(i * i).setScale(2);
+ ByteBuffer buffer = conversion.toBytes(value, schema, schema.getLogicalType());
+ data.add(buffer);
+ expected.add(value);
+ }
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ FieldVector vector = root.getFieldVectors().get(0);
+ checkPrimitiveResult(expected, vector);
+
+ }
+
+ @Test
+ public void testDecimalWithOriginalFixed() throws Exception {
+ Schema schema = getSchema("logical/test_decimal_with_original_fixed.avsc");
+
+ List<GenericFixed> data = new ArrayList<>();
+ List<BigDecimal> expected = new ArrayList<>();
+
+ Conversions.DecimalConversion conversion = new Conversions.DecimalConversion();
+
+ for (int i = 0; i < 5; i++) {
+ BigDecimal value = new BigDecimal(i * i).setScale(2);
+ GenericFixed fixed = conversion.toFixed(value, schema, schema.getLogicalType());
+ data.add(fixed);
+ expected.add(value);
+ }
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ FieldVector vector = root.getFieldVectors().get(0);
+ checkPrimitiveResult(expected, vector);
+ }
+
+ @Test
+ public void testInvalidDecimalPrecision() throws Exception {
+ Schema schema = getSchema("logical/test_decimal_invalid1.avsc");
+ List<ByteBuffer> data = new ArrayList<>();
+
+ Conversions.DecimalConversion conversion = new Conversions.DecimalConversion();
+
+ for (int i = 0; i < 5; i++) {
+ BigDecimal value = new BigDecimal(i * i).setScale(2);
+ ByteBuffer buffer = conversion.toBytes(value, schema, schema.getLogicalType());
+ data.add(buffer);
+ }
+
+ IllegalArgumentException e = assertThrows(IllegalArgumentException.class,
+ () -> writeAndRead(schema, data));
+ assertTrue(e.getMessage().contains("Precision must be in range of 1 to 38"));
+
+ }
+
+ @Test
+ public void testFailedToCreateDecimalLogicalType() throws Exception {
+ // For decimal logical type, if avro validate schema failed, it will not create logical type,
+ // and the schema will be treated as its original type.
+
+ // java.lang.IllegalArgumentException: Invalid decimal scale: -1 (must be positive)
+ Schema schema1 = getSchema("logical/test_decimal_invalid2.avsc");
+ assertNull(schema1.getLogicalType());
+
+ // java.lang.IllegalArgumentException: Invalid decimal scale: 40 (greater than precision: 20)
+ Schema schema2 = getSchema("logical/test_decimal_invalid3.avsc");
+ assertNull(schema2.getLogicalType());
+
+ // java.lang.IllegalArgumentException: fixed(1) cannot store 30 digits (max 2)
+ Schema schema3 = getSchema("logical/test_decimal_invalid4.avsc");
+ assertNull(schema3.getLogicalType());
+ }
+
+}
diff --git a/src/arrow/java/adapter/avro/src/test/java/org/apache/arrow/AvroSkipFieldTest.java b/src/arrow/java/adapter/avro/src/test/java/org/apache/arrow/AvroSkipFieldTest.java
new file mode 100644
index 000000000..b946dbd86
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/java/org/apache/arrow/AvroSkipFieldTest.java
@@ -0,0 +1,626 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow;
+
+import static org.junit.Assert.assertEquals;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.types.Types;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericRecord;
+import org.junit.Test;
+
+public class AvroSkipFieldTest extends AvroTestBase {
+
+ @Test
+ public void testSkipUnionWithOneField() throws Exception {
+ Set<String> skipFieldNames = new HashSet<>();
+ skipFieldNames.add("f0");
+ config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build();
+ Schema schema = getSchema("skip/test_skip_union_before.avsc");
+ Schema expectedSchema = getSchema("skip/test_skip_union_one_field_expected.avsc");
+
+ ArrayList<GenericRecord> data = new ArrayList<>();
+ ArrayList<GenericRecord> expectedData = new ArrayList<>();
+
+ for (int i = 0; i < 5; i++) {
+ GenericRecord record = new GenericData.Record(schema);
+ record.put(0, "test" + i);
+ record.put(1, i % 2 == 0 ? "test" + i : null);
+ record.put(2, i % 2 == 0 ? "test" + i : i);
+ record.put(3, i);
+ data.add(record);
+
+ GenericRecord expectedRecord = new GenericData.Record(expectedSchema);
+ expectedRecord.put(0, record.get(1));
+ expectedRecord.put(1, record.get(2));
+ expectedRecord.put(2, record.get(3));
+ expectedData.add(expectedRecord);
+ }
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ checkRecordResult(expectedSchema, expectedData, root);
+ }
+
+ @Test
+ public void testSkipUnionWithNullableOneField() throws Exception {
+ Set<String> skipFieldNames = new HashSet<>();
+ skipFieldNames.add("f1");
+ config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build();
+ Schema schema = getSchema("skip/test_skip_union_before.avsc");
+ Schema expectedSchema = getSchema("skip/test_skip_union_nullable_field_expected.avsc");
+
+ ArrayList<GenericRecord> data = new ArrayList<>();
+ ArrayList<GenericRecord> expectedData = new ArrayList<>();
+
+ for (int i = 0; i < 5; i++) {
+ GenericRecord record = new GenericData.Record(schema);
+ record.put(0, "test" + i);
+ record.put(1, i % 2 == 0 ? "test" + i : null);
+ record.put(2, i % 2 == 0 ? "test" + i : i);
+ record.put(3, i);
+ data.add(record);
+
+ GenericRecord expectedRecord = new GenericData.Record(expectedSchema);
+ expectedRecord.put(0, record.get(0));
+ expectedRecord.put(1, record.get(2));
+ expectedRecord.put(2, record.get(3));
+ expectedData.add(expectedRecord);
+ }
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ checkRecordResult(expectedSchema, expectedData, root);
+ }
+
+ @Test
+ public void testSkipUnionWithMultiFields() throws Exception {
+ Set<String> skipFieldNames = new HashSet<>();
+ skipFieldNames.add("f2");
+ config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build();
+ Schema schema = getSchema("skip/test_skip_union_before.avsc");
+ Schema expectedSchema = getSchema("skip/test_skip_union_multi_fields_expected.avsc");
+
+ ArrayList<GenericRecord> data = new ArrayList<>();
+ ArrayList<GenericRecord> expectedData = new ArrayList<>();
+
+ for (int i = 0; i < 5; i++) {
+ GenericRecord record = new GenericData.Record(schema);
+ record.put(0, "test" + i);
+ record.put(1, i % 2 == 0 ? "test" + i : null);
+ record.put(2, i % 2 == 0 ? "test" + i : i);
+ record.put(3, i);
+ data.add(record);
+
+ GenericRecord expectedRecord = new GenericData.Record(expectedSchema);
+ expectedRecord.put(0, record.get(0));
+ expectedRecord.put(1, record.get(1));
+ expectedRecord.put(2, record.get(3));
+ expectedData.add(expectedRecord);
+ }
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ checkRecordResult(expectedSchema, expectedData, root);
+ }
+
+ @Test
+ public void testSkipMapField() throws Exception {
+ Set<String> skipFieldNames = new HashSet<>();
+ skipFieldNames.add("f1");
+ config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build();
+ Schema schema = getSchema("skip/test_skip_map_before.avsc");
+ Schema expectedSchema = getSchema("skip/test_skip_map_expected.avsc");
+
+ HashMap map = new HashMap();
+ map.put("key1", "value1");
+ map.put("key2", "value3");
+
+ ArrayList<GenericRecord> data = new ArrayList<>();
+ ArrayList<GenericRecord> expectedData = new ArrayList<>();
+
+ for (int i = 0; i < 5; i++) {
+ GenericRecord record = new GenericData.Record(schema);
+ record.put(0, "test" + i);
+ record.put(1, map);
+ record.put(2, i % 2 == 0);
+ data.add(record);
+
+ GenericRecord expectedRecord = new GenericData.Record(expectedSchema);
+ expectedRecord.put(0, record.get(0));
+ expectedRecord.put(1, record.get(2));
+ expectedData.add(expectedRecord);
+ }
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ checkRecordResult(expectedSchema, expectedData, root);
+ }
+
+ @Test
+ public void testSkipArrayField() throws Exception {
+ Set<String> skipFieldNames = new HashSet<>();
+ skipFieldNames.add("f1");
+ config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build();
+ Schema schema = getSchema("skip/test_skip_array_before.avsc");
+ Schema expectedSchema = getSchema("skip/test_skip_array_expected.avsc");
+
+ ArrayList<GenericRecord> data = new ArrayList<>();
+ ArrayList<GenericRecord> expectedData = new ArrayList<>();
+
+ for (int i = 0; i < 5; i++) {
+ GenericRecord record = new GenericData.Record(schema);
+ record.put(0, "test" + i);
+ record.put(1, Arrays.asList("test" + i, "test" + i));
+ record.put(2, i % 2 == 0);
+ data.add(record);
+
+ GenericRecord expectedRecord = new GenericData.Record(expectedSchema);
+ expectedRecord.put(0, record.get(0));
+ expectedRecord.put(1, record.get(2));
+ expectedData.add(expectedRecord);
+ }
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ checkRecordResult(expectedSchema, expectedData, root);
+ }
+
+ @Test
+ public void testSkipMultiFields() throws Exception {
+ Set<String> skipFieldNames = new HashSet<>();
+ skipFieldNames.add("f1");
+ skipFieldNames.add("f2");
+ config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build();
+ Schema schema = getSchema("test_record.avsc");
+ Schema expectedSchema = getSchema("skip/test_skip_multi_fields_expected.avsc");
+
+ ArrayList<GenericRecord> data = new ArrayList<>();
+ ArrayList<GenericRecord> expectedData = new ArrayList<>();
+
+ for (int i = 0; i < 5; i++) {
+ GenericRecord record = new GenericData.Record(schema);
+ record.put(0, "test" + i);
+ record.put(1, i);
+ record.put(2, i % 2 == 0);
+ data.add(record);
+
+ GenericRecord expectedRecord = new GenericData.Record(expectedSchema);
+ expectedRecord.put(0, record.get(0));
+ expectedData.add(expectedRecord);
+ }
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ checkRecordResult(expectedSchema, expectedData, root);
+ }
+
+ @Test
+ public void testSkipStringField() throws Exception {
+ Set<String> skipFieldNames = new HashSet<>();
+ skipFieldNames.add("f2");
+ config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build();
+ Schema schema = getSchema("skip/test_skip_base1.avsc");
+ Schema expectedSchema = getSchema("skip/test_skip_string_expected.avsc");
+
+ ArrayList<GenericRecord> data = new ArrayList<>();
+ ArrayList<GenericRecord> expectedData = new ArrayList<>();
+
+ for (int i = 0; i < 5; i++) {
+ final byte[] testBytes = ("test" + i).getBytes();
+ GenericRecord record = new GenericData.Record(schema);
+ GenericData.Fixed fixed = new GenericData.Fixed(schema.getField("f0").schema());
+ fixed.bytes(testBytes);
+ record.put(0, fixed);
+ GenericData.EnumSymbol symbol = new GenericData.EnumSymbol(schema.getField("f1").schema(), "TEST" + i % 2);
+ record.put(1, symbol);
+ record.put(2, "testtest" + i);
+ record.put(3, ByteBuffer.wrap(testBytes));
+ data.add(record);
+
+ GenericRecord expectedRecord = new GenericData.Record(expectedSchema);
+ expectedRecord.put(0, testBytes);
+ expectedRecord.put(1, (byte) i % 2);
+ expectedRecord.put(2, testBytes);
+ expectedData.add(expectedRecord);
+ }
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ checkRecordResult(expectedSchema, expectedData, root);
+ }
+
+ @Test
+ public void testSkipBytesField() throws Exception {
+ Set<String> skipFieldNames = new HashSet<>();
+ skipFieldNames.add("f3");
+ config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build();
+ Schema schema = getSchema("skip/test_skip_base1.avsc");
+ Schema expectedSchema = getSchema("skip/test_skip_bytes_expected.avsc");
+
+ ArrayList<GenericRecord> data = new ArrayList<>();
+ ArrayList<GenericRecord> expectedData = new ArrayList<>();
+
+ for (int i = 0; i < 5; i++) {
+ final byte[] testBytes = ("test" + i).getBytes();
+ GenericRecord record = new GenericData.Record(schema);
+ GenericData.Fixed fixed = new GenericData.Fixed(schema.getField("f0").schema());
+ fixed.bytes(testBytes);
+ record.put(0, fixed);
+ GenericData.EnumSymbol symbol = new GenericData.EnumSymbol(schema.getField("f1").schema(), "TEST" + i % 2);
+ record.put(1, symbol);
+ record.put(2, "testtest" + i);
+ record.put(3, ByteBuffer.wrap(testBytes));
+ data.add(record);
+
+ GenericRecord expectedRecord = new GenericData.Record(expectedSchema);
+ expectedRecord.put(0, testBytes);
+ expectedRecord.put(1, (byte) i % 2);
+ expectedRecord.put(2, record.get(2));
+ expectedData.add(expectedRecord);
+ }
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ checkRecordResult(expectedSchema, expectedData, root);
+ }
+
+ @Test
+ public void testSkipFixedField() throws Exception {
+ Set<String> skipFieldNames = new HashSet<>();
+ skipFieldNames.add("f0");
+ config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build();
+ Schema schema = getSchema("skip/test_skip_base1.avsc");
+ Schema expectedSchema = getSchema("skip/test_skip_fixed_expected.avsc");
+
+ ArrayList<GenericRecord> data = new ArrayList<>();
+ ArrayList<GenericRecord> expectedData = new ArrayList<>();
+
+ for (int i = 0; i < 5; i++) {
+ final byte[] testBytes = ("test" + i).getBytes();
+ GenericRecord record = new GenericData.Record(schema);
+ GenericData.Fixed fixed = new GenericData.Fixed(schema.getField("f0").schema());
+ fixed.bytes(testBytes);
+ record.put(0, fixed);
+ GenericData.EnumSymbol symbol = new GenericData.EnumSymbol(schema.getField("f1").schema(), "TEST" + i % 2);
+ record.put(1, symbol);
+ record.put(2, "testtest" + i);
+ record.put(3, ByteBuffer.wrap(testBytes));
+ data.add(record);
+
+ GenericRecord expectedRecord = new GenericData.Record(expectedSchema);
+ expectedRecord.put(0, (byte) i % 2);
+ expectedRecord.put(1, record.get(2));
+ expectedRecord.put(2, record.get(3));
+ expectedData.add(expectedRecord);
+ }
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ checkRecordResult(expectedSchema, expectedData, root);
+ }
+
+ @Test
+ public void testSkipEnumField() throws Exception {
+ Set<String> skipFieldNames = new HashSet<>();
+ skipFieldNames.add("f1");
+ config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build();
+ Schema schema = getSchema("skip/test_skip_base1.avsc");
+ Schema expectedSchema = getSchema("skip/test_skip_fixed_expected.avsc");
+
+ ArrayList<GenericRecord> data = new ArrayList<>();
+ ArrayList<GenericRecord> expectedData = new ArrayList<>();
+
+ for (int i = 0; i < 5; i++) {
+ final byte[] testBytes = ("test" + i).getBytes();
+ GenericRecord record = new GenericData.Record(schema);
+ GenericData.Fixed fixed = new GenericData.Fixed(schema.getField("f0").schema());
+ fixed.bytes(testBytes);
+ record.put(0, fixed);
+ GenericData.EnumSymbol symbol = new GenericData.EnumSymbol(schema.getField("f1").schema(), "TEST" + i % 2);
+ record.put(1, symbol);
+ record.put(2, "testtest" + i);
+ record.put(3, ByteBuffer.wrap(testBytes));
+ data.add(record);
+
+ GenericRecord expectedRecord = new GenericData.Record(expectedSchema);
+ expectedRecord.put(0, testBytes);
+ expectedRecord.put(1, record.get(2));
+ expectedRecord.put(2, record.get(3));
+ expectedData.add(expectedRecord);
+ }
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ checkRecordResult(expectedSchema, expectedData, root);
+ }
+
+ @Test
+ public void testSkipBooleanField() throws Exception {
+ Set<String> skipFieldNames = new HashSet<>();
+ skipFieldNames.add("f0");
+ config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build();
+ Schema schema = getSchema("skip/test_skip_base2.avsc");
+ Schema expectedSchema = getSchema("skip/test_skip_boolean_expected.avsc");
+
+ ArrayList<GenericRecord> data = new ArrayList<>();
+ ArrayList<GenericRecord> expectedData = new ArrayList<>();
+
+ for (int i = 0; i < 5; i++) {
+ GenericRecord record = new GenericData.Record(schema);
+ record.put(0, i % 2 == 0);
+ record.put(1, i);
+ record.put(2, (long) i);
+ record.put(3, (float) i);
+ record.put(4, (double) i);
+ data.add(record);
+
+ GenericRecord expectedRecord = new GenericData.Record(expectedSchema);
+ expectedRecord.put(0, record.get(1));
+ expectedRecord.put(1, record.get(2));
+ expectedRecord.put(2, record.get(3));
+ expectedRecord.put(3, record.get(4));
+
+ expectedData.add(expectedRecord);
+ }
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ checkRecordResult(expectedSchema, expectedData, root);
+ }
+
+ @Test
+ public void testSkipIntField() throws Exception {
+ Set<String> skipFieldNames = new HashSet<>();
+ skipFieldNames.add("f1");
+ config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build();
+ Schema schema = getSchema("skip/test_skip_base2.avsc");
+ Schema expectedSchema = getSchema("skip/test_skip_int_expected.avsc");
+
+ ArrayList<GenericRecord> data = new ArrayList<>();
+ ArrayList<GenericRecord> expectedData = new ArrayList<>();
+
+ for (int i = 0; i < 5; i++) {
+ GenericRecord record = new GenericData.Record(schema);
+ record.put(0, i % 2 == 0);
+ record.put(1, i);
+ record.put(2, (long) i);
+ record.put(3, (float) i);
+ record.put(4, (double) i);
+ data.add(record);
+
+ GenericRecord expectedRecord = new GenericData.Record(expectedSchema);
+ expectedRecord.put(0, record.get(0));
+ expectedRecord.put(1, record.get(2));
+ expectedRecord.put(2, record.get(3));
+ expectedRecord.put(3, record.get(4));
+
+ expectedData.add(expectedRecord);
+ }
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ checkRecordResult(expectedSchema, expectedData, root);
+ }
+
+ @Test
+ public void testSkipLongField() throws Exception {
+ Set<String> skipFieldNames = new HashSet<>();
+ skipFieldNames.add("f2");
+ config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build();
+ Schema schema = getSchema("skip/test_skip_base2.avsc");
+ Schema expectedSchema = getSchema("skip/test_skip_long_expected.avsc");
+
+ ArrayList<GenericRecord> data = new ArrayList<>();
+ ArrayList<GenericRecord> expectedData = new ArrayList<>();
+
+ for (int i = 0; i < 5; i++) {
+ GenericRecord record = new GenericData.Record(schema);
+ record.put(0, i % 2 == 0);
+ record.put(1, i);
+ record.put(2, (long) i);
+ record.put(3, (float) i);
+ record.put(4, (double) i);
+ data.add(record);
+
+ GenericRecord expectedRecord = new GenericData.Record(expectedSchema);
+ expectedRecord.put(0, record.get(0));
+ expectedRecord.put(1, record.get(1));
+ expectedRecord.put(2, record.get(3));
+ expectedRecord.put(3, record.get(4));
+
+ expectedData.add(expectedRecord);
+ }
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ checkRecordResult(expectedSchema, expectedData, root);
+ }
+
+ @Test
+ public void testSkipFloatField() throws Exception {
+ Set<String> skipFieldNames = new HashSet<>();
+ skipFieldNames.add("f3");
+ config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build();
+ Schema schema = getSchema("skip/test_skip_base2.avsc");
+ Schema expectedSchema = getSchema("skip/test_skip_float_expected.avsc");
+
+ ArrayList<GenericRecord> data = new ArrayList<>();
+ ArrayList<GenericRecord> expectedData = new ArrayList<>();
+
+ for (int i = 0; i < 5; i++) {
+ GenericRecord record = new GenericData.Record(schema);
+ record.put(0, i % 2 == 0);
+ record.put(1, i);
+ record.put(2, (long) i);
+ record.put(3, (float) i);
+ record.put(4, (double) i);
+ data.add(record);
+
+ GenericRecord expectedRecord = new GenericData.Record(expectedSchema);
+ expectedRecord.put(0, record.get(0));
+ expectedRecord.put(1, record.get(1));
+ expectedRecord.put(2, record.get(2));
+ expectedRecord.put(3, record.get(4));
+
+ expectedData.add(expectedRecord);
+ }
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ checkRecordResult(expectedSchema, expectedData, root);
+ }
+
+ @Test
+ public void testSkipDoubleField() throws Exception {
+ Set<String> skipFieldNames = new HashSet<>();
+ skipFieldNames.add("f4");
+ config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build();
+ Schema schema = getSchema("skip/test_skip_base2.avsc");
+ Schema expectedSchema = getSchema("skip/test_skip_double_expected.avsc");
+
+ ArrayList<GenericRecord> data = new ArrayList<>();
+ ArrayList<GenericRecord> expectedData = new ArrayList<>();
+
+ for (int i = 0; i < 5; i++) {
+ GenericRecord record = new GenericData.Record(schema);
+ record.put(0, i % 2 == 0);
+ record.put(1, i);
+ record.put(2, (long) i);
+ record.put(3, (float) i);
+ record.put(4, (double) i);
+ data.add(record);
+
+ GenericRecord expectedRecord = new GenericData.Record(expectedSchema);
+ expectedRecord.put(0, record.get(0));
+ expectedRecord.put(1, record.get(1));
+ expectedRecord.put(2, record.get(2));
+ expectedRecord.put(3, record.get(3));
+
+ expectedData.add(expectedRecord);
+ }
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ checkRecordResult(expectedSchema, expectedData, root);
+ }
+
+ @Test
+ public void testSkipRecordField() throws Exception {
+ Set<String> skipFieldNames = new HashSet<>();
+ skipFieldNames.add("f0");
+ config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build();
+ Schema schema = getSchema("skip/test_skip_record_before.avsc");
+ Schema nestedSchema = schema.getFields().get(0).schema();
+ ArrayList<GenericRecord> data = new ArrayList<>();
+
+ Schema expectedSchema = getSchema("skip/test_skip_record_expected.avsc");
+ ArrayList<GenericRecord> expectedData = new ArrayList<>();
+
+ for (int i = 0; i < 5; i++) {
+ GenericRecord record = new GenericData.Record(schema);
+ GenericRecord nestedRecord = new GenericData.Record(nestedSchema);
+ nestedRecord.put(0, "test" + i);
+ nestedRecord.put(1, i);
+ record.put(0, nestedRecord);
+ record.put(1, i);
+ data.add(record);
+
+ GenericRecord expectedRecord = new GenericData.Record(expectedSchema);
+ expectedRecord.put(0, i);
+ expectedData.add(expectedRecord);
+ }
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ checkRecordResult(expectedSchema, expectedData, root);
+ }
+
+ @Test
+ public void testSkipNestedFields() throws Exception {
+ Set<String> skipFieldNames = new HashSet<>();
+ skipFieldNames.add("f0.f0");
+ config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build();
+ Schema schema = getSchema("test_nested_record.avsc");
+ Schema nestedSchema = schema.getFields().get(0).schema();
+ ArrayList<GenericRecord> data = new ArrayList<>();
+
+ Schema expectedSchema = getSchema("skip/test_skip_second_level_expected.avsc");
+ Schema expectedNestedSchema = expectedSchema.getFields().get(0).schema();
+ ArrayList<GenericRecord> expectedData = new ArrayList<>();
+
+ for (int i = 0; i < 5; i++) {
+ GenericRecord record = new GenericData.Record(schema);
+ GenericRecord nestedRecord = new GenericData.Record(nestedSchema);
+ nestedRecord.put(0, "test" + i);
+ nestedRecord.put(1, i);
+ record.put(0, nestedRecord);
+ data.add(record);
+
+ GenericRecord expectedRecord = new GenericData.Record(expectedSchema);
+ GenericRecord expectedNestedRecord = new GenericData.Record(expectedNestedSchema);
+ expectedNestedRecord.put(0, nestedRecord.get(1));
+ expectedRecord.put(0, expectedNestedRecord);
+ expectedData.add(expectedRecord);
+ }
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ checkNestedRecordResult(expectedSchema, expectedData, root);
+ }
+
+ @Test
+ public void testSkipThirdLevelField() throws Exception {
+ Schema firstLevelSchema = getSchema("skip/test_skip_third_level_expected.avsc");
+ Schema secondLevelSchema = firstLevelSchema.getFields().get(0).schema();
+ Schema thirdLevelSchema = secondLevelSchema.getFields().get(0).schema();
+
+ ArrayList<GenericRecord> data = new ArrayList<>();
+ for (int i = 0; i < 5; i++) {
+ GenericRecord firstLevelRecord = new GenericData.Record(firstLevelSchema);
+ GenericRecord secondLevelRecord = new GenericData.Record(secondLevelSchema);
+ GenericRecord thirdLevelRecord = new GenericData.Record(thirdLevelSchema);
+
+ thirdLevelRecord.put(0, i);
+ thirdLevelRecord.put(1, "test" + i);
+ thirdLevelRecord.put(2, i % 2 == 0);
+
+ secondLevelRecord.put(0, thirdLevelRecord);
+ firstLevelRecord.put(0, secondLevelRecord);
+ data.add(firstLevelRecord);
+ }
+
+ // do not skip any fields first
+ VectorSchemaRoot root1 = writeAndRead(firstLevelSchema, data);
+
+ assertEquals(1, root1.getFieldVectors().size());
+ assertEquals(Types.MinorType.STRUCT, root1.getFieldVectors().get(0).getMinorType());
+ StructVector secondLevelVector = (StructVector) root1.getFieldVectors().get(0);
+ assertEquals(1, secondLevelVector.getChildrenFromFields().size());
+ assertEquals(Types.MinorType.STRUCT, secondLevelVector.getChildrenFromFields().get(0).getMinorType());
+ StructVector thirdLevelVector = (StructVector) secondLevelVector.getChildrenFromFields().get(0);
+ assertEquals(3, thirdLevelVector.getChildrenFromFields().size());
+
+ // skip third level field and validate
+ Set<String> skipFieldNames = new HashSet<>();
+ skipFieldNames.add("f0.f0.f0");
+ config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build();
+ VectorSchemaRoot root2 = writeAndRead(firstLevelSchema, data);
+
+ assertEquals(1, root2.getFieldVectors().size());
+ assertEquals(Types.MinorType.STRUCT, root2.getFieldVectors().get(0).getMinorType());
+ StructVector secondStruct = (StructVector) root2.getFieldVectors().get(0);
+ assertEquals(1, secondStruct.getChildrenFromFields().size());
+ assertEquals(Types.MinorType.STRUCT, secondStruct.getChildrenFromFields().get(0).getMinorType());
+ StructVector thirdStruct = (StructVector) secondStruct.getChildrenFromFields().get(0);
+ assertEquals(2, thirdStruct.getChildrenFromFields().size());
+
+ assertEquals(Types.MinorType.INT, thirdStruct.getChildrenFromFields().get(0).getMinorType());
+ assertEquals(Types.MinorType.BIT, thirdStruct.getChildrenFromFields().get(1).getMinorType());
+ }
+}
diff --git a/src/arrow/java/adapter/avro/src/test/java/org/apache/arrow/AvroTestBase.java b/src/arrow/java/adapter/avro/src/test/java/org/apache/arrow/AvroTestBase.java
new file mode 100644
index 000000000..a00cd7704
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/java/org/apache/arrow/AvroTestBase.java
@@ -0,0 +1,228 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.nio.ByteBuffer;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.util.Text;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.io.BinaryDecoder;
+import org.apache.avro.io.BinaryEncoder;
+import org.apache.avro.io.DatumWriter;
+import org.apache.avro.io.DecoderFactory;
+import org.apache.avro.io.EncoderFactory;
+import org.junit.Before;
+import org.junit.ClassRule;
+import org.junit.rules.TemporaryFolder;
+
+public class AvroTestBase {
+
+ @ClassRule
+ public static final TemporaryFolder TMP = new TemporaryFolder();
+
+ protected AvroToArrowConfig config;
+
+ @Before
+ public void init() {
+ BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+ config = new AvroToArrowConfigBuilder(allocator).build();
+ }
+
+ protected Schema getSchema(String schemaName) throws Exception {
+ Path schemaPath = Paths.get(TestWriteReadAvroRecord.class.getResource("/").getPath(),
+ "schema", schemaName);
+ return new Schema.Parser().parse(schemaPath.toFile());
+ }
+
+ protected VectorSchemaRoot writeAndRead(Schema schema, List data) throws Exception {
+ File dataFile = TMP.newFile();
+
+ BinaryEncoder
+ encoder = new EncoderFactory().directBinaryEncoder(new FileOutputStream(dataFile), null);
+ DatumWriter writer = new GenericDatumWriter(schema);
+ BinaryDecoder
+ decoder = new DecoderFactory().directBinaryDecoder(new FileInputStream(dataFile), null);
+
+ for (Object value : data) {
+ writer.write(value, encoder);
+ }
+
+ return AvroToArrow.avroToArrow(schema, decoder, config);
+ }
+
+ protected void checkArrayResult(List<List<?>> expected, ListVector vector) {
+ assertEquals(expected.size(), vector.getValueCount());
+ for (int i = 0; i < expected.size(); i++) {
+ checkArrayElement(expected.get(i), vector.getObject(i));
+ }
+ }
+
+ protected void checkArrayElement(List expected, List actual) {
+ assertEquals(expected.size(), actual.size());
+ for (int i = 0; i < expected.size(); i++) {
+ Object value1 = expected.get(i);
+ Object value2 = actual.get(i);
+ if (value1 == null) {
+ assertTrue(value2 == null);
+ continue;
+ }
+ if (value2 instanceof byte[]) {
+ value2 = ByteBuffer.wrap((byte[]) value2);
+ } else if (value2 instanceof Text) {
+ value2 = value2.toString();
+ }
+ assertEquals(value1, value2);
+ }
+ }
+
+ protected void checkPrimitiveResult(List data, FieldVector vector) {
+ assertEquals(data.size(), vector.getValueCount());
+ for (int i = 0; i < data.size(); i++) {
+ Object value1 = data.get(i);
+ Object value2 = vector.getObject(i);
+ if (value1 == null) {
+ assertTrue(value2 == null);
+ continue;
+ }
+ if (value2 instanceof byte[]) {
+ value2 = ByteBuffer.wrap((byte[]) value2);
+ if (value1 instanceof byte[]) {
+ value1 = ByteBuffer.wrap((byte[]) value1);
+ }
+ } else if (value2 instanceof Text) {
+ value2 = value2.toString();
+ } else if (value2 instanceof Byte) {
+ value2 = ((Byte) value2).intValue();
+ }
+ assertEquals(value1, value2);
+ }
+ }
+
+ protected void checkRecordResult(Schema schema, ArrayList<GenericRecord> data, VectorSchemaRoot root) {
+ assertEquals(data.size(), root.getRowCount());
+ assertEquals(schema.getFields().size(), root.getFieldVectors().size());
+
+ for (int i = 0; i < schema.getFields().size(); i++) {
+ ArrayList fieldData = new ArrayList();
+ for (GenericRecord record : data) {
+ fieldData.add(record.get(i));
+ }
+
+ checkPrimitiveResult(fieldData, root.getFieldVectors().get(i));
+ }
+
+ }
+
+ protected void checkNestedRecordResult(Schema schema, List<GenericRecord> data, VectorSchemaRoot root) {
+ assertEquals(data.size(), root.getRowCount());
+ assertTrue(schema.getFields().size() == 1);
+
+ final Schema nestedSchema = schema.getFields().get(0).schema();
+ final StructVector structVector = (StructVector) root.getFieldVectors().get(0);
+
+ for (int i = 0; i < nestedSchema.getFields().size(); i++) {
+ ArrayList fieldData = new ArrayList();
+ for (GenericRecord record : data) {
+ GenericRecord nestedRecord = (GenericRecord) record.get(0);
+ fieldData.add(nestedRecord.get(i));
+ }
+
+ checkPrimitiveResult(fieldData, structVector.getChildrenFromFields().get(i));
+ }
+
+ }
+
+
+ // belows are for iterator api
+
+ protected void checkArrayResult(List<List<?>> expected, List<ListVector> vectors) {
+ int valueCount = vectors.stream().mapToInt(v -> v.getValueCount()).sum();
+ assertEquals(expected.size(), valueCount);
+
+ int index = 0;
+ for (ListVector vector : vectors) {
+ for (int i = 0; i < vector.getValueCount(); i++) {
+ checkArrayElement(expected.get(index++), vector.getObject(i));
+ }
+ }
+ }
+
+ protected void checkRecordResult(Schema schema, ArrayList<GenericRecord> data, List<VectorSchemaRoot> roots) {
+ roots.forEach(root -> {
+ assertEquals(schema.getFields().size(), root.getFieldVectors().size());
+ });
+
+ for (int i = 0; i < schema.getFields().size(); i++) {
+ List fieldData = new ArrayList();
+ List<FieldVector> vectors = new ArrayList<>();
+ for (GenericRecord record : data) {
+ fieldData.add(record.get(i));
+ }
+ final int columnIndex = i;
+ roots.forEach(root -> vectors.add(root.getFieldVectors().get(columnIndex)));
+
+ checkPrimitiveResult(fieldData, vectors);
+ }
+
+ }
+
+ protected void checkPrimitiveResult(List data, List<FieldVector> vectors) {
+ int valueCount = vectors.stream().mapToInt(v -> v.getValueCount()).sum();
+ assertEquals(data.size(), valueCount);
+
+ int index = 0;
+ for (FieldVector vector : vectors) {
+ for (int i = 0; i < vector.getValueCount(); i++) {
+ Object value1 = data.get(index++);
+ Object value2 = vector.getObject(i);
+ if (value1 == null) {
+ assertNull(value2);
+ continue;
+ }
+ if (value2 instanceof byte[]) {
+ value2 = ByteBuffer.wrap((byte[]) value2);
+ if (value1 instanceof byte[]) {
+ value1 = ByteBuffer.wrap((byte[]) value1);
+ }
+ } else if (value2 instanceof Text) {
+ value2 = value2.toString();
+ }
+ assertEquals(value1, value2);
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/adapter/avro/src/test/java/org/apache/arrow/AvroToArrowIteratorTest.java b/src/arrow/java/adapter/avro/src/test/java/org/apache/arrow/AvroToArrowIteratorTest.java
new file mode 100644
index 000000000..2b05a19f3
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/java/org/apache/arrow/AvroToArrowIteratorTest.java
@@ -0,0 +1,313 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.EOFException;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.util.AutoCloseables;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.io.BinaryDecoder;
+import org.apache.avro.io.BinaryEncoder;
+import org.apache.avro.io.DatumWriter;
+import org.apache.avro.io.Decoder;
+import org.apache.avro.io.DecoderFactory;
+import org.apache.avro.io.EncoderFactory;
+import org.apache.avro.util.Utf8;
+import org.junit.Test;
+
+public class AvroToArrowIteratorTest extends AvroTestBase {
+
+ @Override
+ public void init() {
+ final BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+ this.config = new AvroToArrowConfigBuilder(allocator).setTargetBatchSize(3).build();
+ }
+
+ private AvroToArrowVectorIterator convert(Schema schema, List data) throws Exception {
+ File dataFile = TMP.newFile();
+
+ BinaryEncoder
+ encoder = new EncoderFactory().directBinaryEncoder(new FileOutputStream(dataFile), null);
+ DatumWriter writer = new GenericDatumWriter(schema);
+ BinaryDecoder
+ decoder = new DecoderFactory().directBinaryDecoder(new FileInputStream(dataFile), null);
+
+ for (Object value : data) {
+ writer.write(value, encoder);
+ }
+
+ return AvroToArrow.avroToArrowIterator(schema, decoder, config);
+ }
+
+ @Test
+ public void testStringType() throws Exception {
+ Schema schema = getSchema("test_primitive_string.avsc");
+ List<String> data = Arrays.asList("v1", "v2", "v3", "v4", "v5");
+
+ List<VectorSchemaRoot> roots = new ArrayList<>();
+ List<FieldVector> vectors = new ArrayList<>();
+ try (AvroToArrowVectorIterator iterator = convert(schema, data)) {
+ while (iterator.hasNext()) {
+ VectorSchemaRoot root = iterator.next();
+ FieldVector vector = root.getFieldVectors().get(0);
+ roots.add(root);
+ vectors.add(vector);
+ }
+ }
+ checkPrimitiveResult(data, vectors);
+ AutoCloseables.close(roots);
+ }
+
+ @Test
+ public void testNullableStringType() throws Exception {
+ Schema schema = getSchema("test_nullable_string.avsc");
+
+ List<GenericRecord> data = new ArrayList<>();
+ List<String> expected = new ArrayList<>();
+ for (int i = 0; i < 5; i++) {
+ GenericRecord record = new GenericData.Record(schema);
+ String value = i % 2 == 0 ? "test" + i : null;
+ record.put(0, value);
+ expected.add(value);
+ data.add(record);
+ }
+
+ List<VectorSchemaRoot> roots = new ArrayList<>();
+ List<FieldVector> vectors = new ArrayList<>();
+ try (AvroToArrowVectorIterator iterator = convert(schema, data);) {
+ while (iterator.hasNext()) {
+ VectorSchemaRoot root = iterator.next();
+ FieldVector vector = root.getFieldVectors().get(0);
+ roots.add(root);
+ vectors.add(vector);
+ }
+ }
+ checkPrimitiveResult(expected, vectors);
+ AutoCloseables.close(roots);
+
+ }
+
+ @Test
+ public void testRecordType() throws Exception {
+ Schema schema = getSchema("test_record.avsc");
+ ArrayList<GenericRecord> data = new ArrayList<>();
+ for (int i = 0; i < 5; i++) {
+ GenericRecord record = new GenericData.Record(schema);
+ record.put(0, "test" + i);
+ record.put(1, i);
+ record.put(2, i % 2 == 0);
+ data.add(record);
+ }
+
+ List<VectorSchemaRoot> roots = new ArrayList<>();
+ try (AvroToArrowVectorIterator iterator = convert(schema, data)) {
+ while (iterator.hasNext()) {
+ roots.add(iterator.next());
+ }
+ }
+ checkRecordResult(schema, data, roots);
+ AutoCloseables.close(roots);
+
+ }
+
+ @Test
+ public void testArrayType() throws Exception {
+ Schema schema = getSchema("test_array.avsc");
+ List<List<?>> data = Arrays.asList(
+ Arrays.asList("11", "222", "999"),
+ Arrays.asList("12222", "2333", "1000"),
+ Arrays.asList("1rrr", "2ggg"),
+ Arrays.asList("1vvv", "2bbb"),
+ Arrays.asList("1fff", "2"));
+
+ List<VectorSchemaRoot> roots = new ArrayList<>();
+ List<ListVector> vectors = new ArrayList<>();
+ try (AvroToArrowVectorIterator iterator = convert(schema, data)) {
+ while (iterator.hasNext()) {
+ VectorSchemaRoot root = iterator.next();
+ roots.add(root);
+ vectors.add((ListVector) root.getFieldVectors().get(0));
+ }
+ }
+ checkArrayResult(data, vectors);
+ AutoCloseables.close(roots);
+ }
+
+ @Test
+ public void runLargeNumberOfRows() throws Exception {
+ Schema schema = getSchema("test_large_data.avsc");
+ int x = 0;
+ final int targetRows = 600000;
+ Decoder fakeDecoder = new FakeDecoder(targetRows);
+ try (AvroToArrowVectorIterator iter = AvroToArrow.avroToArrowIterator(schema, fakeDecoder,
+ new AvroToArrowConfigBuilder(config.getAllocator()).build())) {
+ while (iter.hasNext()) {
+ VectorSchemaRoot root = iter.next();
+ x += root.getRowCount();
+ root.close();
+ }
+ }
+
+ assertEquals(x, targetRows);
+ }
+
+ /**
+ * Fake avro decoder to test large data.
+ */
+ private class FakeDecoder extends Decoder {
+
+ private int numRows;
+
+ FakeDecoder(int numRows) {
+ this.numRows = numRows;
+ }
+
+ // note that Decoder has no hasNext() API, assume enum is the first type in schema
+ // and fixed is the last type in schema and they are unique.
+ private void validate() throws EOFException {
+ if (numRows <= 0) {
+ throw new EOFException();
+ }
+ }
+
+ @Override
+ public void readNull() throws IOException {
+ }
+
+ @Override
+ public boolean readBoolean() throws IOException {
+ return false;
+ }
+
+ @Override
+ public int readInt() throws IOException {
+ return 0;
+ }
+
+ @Override
+ public long readLong() throws IOException {
+ return 0;
+ }
+
+ @Override
+ public float readFloat() throws IOException {
+ return 0;
+ }
+
+ @Override
+ public double readDouble() throws IOException {
+ return 0;
+ }
+
+ @Override
+ public Utf8 readString(Utf8 old) throws IOException {
+ return new Utf8("test123test123" + numRows);
+ }
+
+ @Override
+ public String readString() throws IOException {
+ return "test123test123" + numRows;
+ }
+
+ @Override
+ public void skipString() throws IOException {
+
+ }
+
+ @Override
+ public ByteBuffer readBytes(ByteBuffer old) throws IOException {
+ return ByteBuffer.allocate(0);
+ }
+
+ @Override
+ public void skipBytes() throws IOException {
+
+ }
+
+ @Override
+ public void readFixed(byte[] bytes, int start, int length) throws IOException {
+ // fixed type is last column, after read value, decrease numRows
+ numRows--;
+ }
+
+ @Override
+ public void skipFixed(int length) throws IOException {
+
+ }
+
+ @Override
+ public int readEnum() throws IOException {
+ // enum type is first column, validate numRows first.
+ validate();
+ return 0;
+ }
+
+ @Override
+ public long readArrayStart() throws IOException {
+ return 5;
+ }
+
+ @Override
+ public long arrayNext() throws IOException {
+ return 0;
+ }
+
+ @Override
+ public long skipArray() throws IOException {
+ return 0;
+ }
+
+ @Override
+ public long readMapStart() throws IOException {
+ return 5;
+ }
+
+ @Override
+ public long mapNext() throws IOException {
+ return 0;
+ }
+
+ @Override
+ public long skipMap() throws IOException {
+ return 0;
+ }
+
+ @Override
+ public int readIndex() throws IOException {
+ return 0;
+ }
+ }
+}
diff --git a/src/arrow/java/adapter/avro/src/test/java/org/apache/arrow/AvroToArrowTest.java b/src/arrow/java/adapter/avro/src/test/java/org/apache/arrow/AvroToArrowTest.java
new file mode 100644
index 000000000..c007e1ac7
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/java/org/apache/arrow/AvroToArrowTest.java
@@ -0,0 +1,477 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow;
+
+import static org.junit.Assert.assertEquals;
+
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.MapVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericRecord;
+import org.junit.Test;
+
+public class AvroToArrowTest extends AvroTestBase {
+
+ @Test
+ public void testStringType() throws Exception {
+ Schema schema = getSchema("test_primitive_string.avsc");
+ List<String> data = Arrays.asList("v1", "v2", "v3", "v4", "v5");
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ FieldVector vector = root.getFieldVectors().get(0);
+
+ checkPrimitiveResult(data, vector);
+ }
+
+ @Test
+ public void testNullableStringType() throws Exception {
+ Schema schema = getSchema("test_nullable_string.avsc");
+
+ ArrayList<GenericRecord> data = new ArrayList<>();
+ for (int i = 0; i < 5; i++) {
+ GenericRecord record = new GenericData.Record(schema);
+ record.put(0, i % 2 == 0 ? "test" + i : null);
+ data.add(record);
+ }
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ checkRecordResult(schema, data, root);
+ }
+
+ @Test
+ public void testRecordType() throws Exception {
+ Schema schema = getSchema("test_record.avsc");
+ ArrayList<GenericRecord> data = new ArrayList<>();
+ for (int i = 0; i < 5; i++) {
+ GenericRecord record = new GenericData.Record(schema);
+ record.put(0, "test" + i);
+ record.put(1, i);
+ record.put(2, i % 2 == 0);
+ data.add(record);
+ }
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ checkRecordResult(schema, data, root);
+ }
+
+ @Test
+ public void testFixedAttributes() throws Exception {
+ Schema schema = getSchema("attrs/test_fixed_attr.avsc");
+
+ List<GenericData.Fixed> data = new ArrayList<>();
+ List<byte[]> expected = new ArrayList<>();
+ for (int i = 0; i < 5; i++) {
+ byte[] value = ("value" + i).getBytes(StandardCharsets.UTF_8);
+ expected.add(value);
+ GenericData.Fixed fixed = new GenericData.Fixed(schema);
+ fixed.bytes(value);
+ data.add(fixed);
+ }
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ FieldVector vector = root.getFieldVectors().get(0);
+
+ Map<String, String> metadata = vector.getField().getMetadata();
+ assertEquals("fixed doc", metadata.get("doc"));
+ assertEquals("[\"alias1\",\"alias2\"]", metadata.get("aliases"));
+ }
+
+ @Test
+ public void testEnumAttributes() throws Exception {
+ Schema schema = getSchema("attrs/test_enum_attrs.avsc");
+ List<GenericData.EnumSymbol> data = Arrays.asList(
+ new GenericData.EnumSymbol(schema, "SPADES"),
+ new GenericData.EnumSymbol(schema, "HEARTS"),
+ new GenericData.EnumSymbol(schema, "DIAMONDS"),
+ new GenericData.EnumSymbol(schema, "CLUBS"),
+ new GenericData.EnumSymbol(schema, "SPADES"));
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ FieldVector vector = root.getFieldVectors().get(0);
+
+ Map<String, String> metadata = vector.getField().getMetadata();
+ assertEquals("enum doc", metadata.get("doc"));
+ assertEquals("[\"alias1\",\"alias2\"]", metadata.get("aliases"));
+ }
+
+ @Test
+ public void testRecordAttributes() throws Exception {
+ Schema schema = getSchema("attrs/test_record_attrs.avsc");
+ Schema nestedSchema = schema.getFields().get(0).schema();
+ ArrayList<GenericRecord> data = new ArrayList<>();
+ for (int i = 0; i < 5; i++) {
+ GenericRecord record = new GenericData.Record(schema);
+ GenericRecord nestedRecord = new GenericData.Record(nestedSchema);
+ nestedRecord.put(0, "test" + i);
+ nestedRecord.put(1, i);
+ record.put(0, nestedRecord);
+
+ data.add(record);
+ }
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+
+ StructVector structVector = (StructVector) root.getFieldVectors().get(0);
+ Map<String, String> structMeta = structVector.getField().getMetadata();
+ Map<String, String> childMeta1 = structVector.getChildByOrdinal(0).getField().getMetadata();
+ Map<String, String> childMeta2 = structVector.getChildByOrdinal(1).getField().getMetadata();
+
+ assertEquals("f0 doc", structMeta.get("doc"));
+ assertEquals("[\"f0.a1\"]", structMeta.get("aliases"));
+ assertEquals("f1 doc", childMeta1.get("doc"));
+ assertEquals("[\"f1.a1\",\"f1.a2\"]", childMeta1.get("aliases"));
+ assertEquals("f2 doc", childMeta2.get("doc"));
+ assertEquals("[\"f2.a1\",\"f2.a2\"]", childMeta2.get("aliases"));
+ }
+
+ @Test
+ public void testNestedRecordType() throws Exception {
+ Schema schema = getSchema("test_nested_record.avsc");
+ Schema nestedSchema = schema.getFields().get(0).schema();
+ ArrayList<GenericRecord> data = new ArrayList<>();
+ for (int i = 0; i < 5; i++) {
+ GenericRecord record = new GenericData.Record(schema);
+ GenericRecord nestedRecord = new GenericData.Record(nestedSchema);
+ nestedRecord.put(0, "test" + i);
+ nestedRecord.put(1, i);
+ record.put(0, nestedRecord);
+
+ data.add(record);
+ }
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ checkNestedRecordResult(schema, data, root);
+ }
+
+ @Test
+ public void testEnumType() throws Exception {
+ Schema schema = getSchema("test_primitive_enum.avsc");
+ List<GenericData.EnumSymbol> data = Arrays.asList(
+ new GenericData.EnumSymbol(schema, "SPADES"),
+ new GenericData.EnumSymbol(schema, "HEARTS"),
+ new GenericData.EnumSymbol(schema, "DIAMONDS"),
+ new GenericData.EnumSymbol(schema, "CLUBS"),
+ new GenericData.EnumSymbol(schema, "SPADES"));
+
+ List<Integer> expectedIndices = Arrays.asList(0, 1, 2, 3, 0);
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ FieldVector vector = root.getFieldVectors().get(0);
+
+ checkPrimitiveResult(expectedIndices, vector);
+
+ VarCharVector dictVector = (VarCharVector) config.getProvider().lookup(0).getVector();
+ assertEquals(4, dictVector.getValueCount());
+
+ assertEquals("SPADES", dictVector.getObject(0).toString());
+ assertEquals("HEARTS", dictVector.getObject(1).toString());
+ assertEquals("DIAMONDS", dictVector.getObject(2).toString());
+ assertEquals("CLUBS", dictVector.getObject(3).toString());
+ }
+
+ @Test
+ public void testIntType() throws Exception {
+ Schema schema = getSchema("test_primitive_int.avsc");
+ List<Integer> data = Arrays.asList(1, 2, 3, 4, 5);
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ FieldVector vector = root.getFieldVectors().get(0);
+
+ checkPrimitiveResult(data, vector);
+ }
+
+ @Test
+ public void testNullableIntType() throws Exception {
+ Schema schema = getSchema("test_nullable_int.avsc");
+
+ ArrayList<GenericRecord> data = new ArrayList<>();
+ for (int i = 0; i < 5; i++) {
+ GenericRecord record = new GenericData.Record(schema);
+ record.put(0, i % 2 == 0 ? i : null);
+ data.add(record);
+ }
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ checkRecordResult(schema, data, root);
+ }
+
+ @Test
+ public void testLongType() throws Exception {
+ Schema schema = getSchema("test_primitive_long.avsc");
+ List<Long> data = Arrays.asList(1L, 2L, 3L, 4L, 5L);
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ FieldVector vector = root.getFieldVectors().get(0);
+
+ checkPrimitiveResult(data, vector);
+ }
+
+ @Test
+ public void testNullableLongType() throws Exception {
+ Schema schema = getSchema("test_nullable_long.avsc");
+
+ ArrayList<GenericRecord> data = new ArrayList<>();
+ for (int i = 0; i < 5; i++) {
+ GenericRecord record = new GenericData.Record(schema);
+ record.put(0, i % 2 == 0 ? (long) i : null);
+ data.add(record);
+ }
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ checkRecordResult(schema, data, root);
+ }
+
+ @Test
+ public void testFloatType() throws Exception {
+ Schema schema = getSchema("test_primitive_float.avsc");
+ List<Float> data = Arrays.asList(1.1f, 2.2f, 3.3f, 4.4f, 5.5f);
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ FieldVector vector = root.getFieldVectors().get(0);
+
+ checkPrimitiveResult(data, vector);
+ }
+
+ @Test
+ public void testNullableFloatType() throws Exception {
+ Schema schema = getSchema("test_nullable_float.avsc");
+
+ ArrayList<GenericRecord> data = new ArrayList<>();
+ for (int i = 0; i < 5; i++) {
+ GenericRecord record = new GenericData.Record(schema);
+ record.put(0, i % 2 == 0 ? i + 0.1f : null);
+ data.add(record);
+ }
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ checkRecordResult(schema, data, root);
+ }
+
+ @Test
+ public void testDoubleType() throws Exception {
+ Schema schema = getSchema("test_primitive_double.avsc");
+ List<Double> data = Arrays.asList(1.1, 2.2, 3.3, 4.4, 5.5);
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ FieldVector vector = root.getFieldVectors().get(0);
+
+ checkPrimitiveResult(data, vector);
+ }
+
+ @Test
+ public void testNullableDoubleType() throws Exception {
+ Schema schema = getSchema("test_nullable_double.avsc");
+
+ ArrayList<GenericRecord> data = new ArrayList<>();
+ for (int i = 0; i < 5; i++) {
+ GenericRecord record = new GenericData.Record(schema);
+ record.put(0, i % 2 == 0 ? i + 0.1 : null);
+ data.add(record);
+ }
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ checkRecordResult(schema, data, root);
+ }
+
+ @Test
+ public void testBytesType() throws Exception {
+ Schema schema = getSchema("test_primitive_bytes.avsc");
+ List<ByteBuffer> data = Arrays.asList(
+ ByteBuffer.wrap("value1".getBytes(StandardCharsets.UTF_8)),
+ ByteBuffer.wrap("value2".getBytes(StandardCharsets.UTF_8)),
+ ByteBuffer.wrap("value3".getBytes(StandardCharsets.UTF_8)),
+ ByteBuffer.wrap("value4".getBytes(StandardCharsets.UTF_8)),
+ ByteBuffer.wrap("value5".getBytes(StandardCharsets.UTF_8)));
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ FieldVector vector = root.getFieldVectors().get(0);
+
+ checkPrimitiveResult(data, vector);
+ }
+
+ @Test
+ public void testNullableBytesType() throws Exception {
+ Schema schema = getSchema("test_nullable_bytes.avsc");
+
+ ArrayList<GenericRecord> data = new ArrayList<>();
+ for (int i = 0; i < 5; i++) {
+ GenericRecord record = new GenericData.Record(schema);
+ record.put(0, i % 2 == 0 ? ByteBuffer.wrap(("test" + i).getBytes(StandardCharsets.UTF_8)) : null);
+ data.add(record);
+ }
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ checkRecordResult(schema, data, root);
+ }
+
+ @Test
+ public void testBooleanType() throws Exception {
+ Schema schema = getSchema("test_primitive_boolean.avsc");
+ List<Boolean> data = Arrays.asList(true, false, true, false, true);
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ FieldVector vector = root.getFieldVectors().get(0);
+
+ checkPrimitiveResult(data, vector);
+ }
+
+ @Test
+ public void testNullableBooleanType() throws Exception {
+ Schema schema = getSchema("test_nullable_boolean.avsc");
+
+ ArrayList<GenericRecord> data = new ArrayList<>();
+ for (int i = 0; i < 5; i++) {
+ GenericRecord record = new GenericData.Record(schema);
+ record.put(0, i % 2 == 0 ? true : null);
+ data.add(record);
+ }
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ checkRecordResult(schema, data, root);
+ }
+
+ @Test
+ public void testArrayType() throws Exception {
+ Schema schema = getSchema("test_array.avsc");
+ List<List<?>> data = Arrays.asList(
+ Arrays.asList("11", "222", "999"),
+ Arrays.asList("12222", "2333", "1000"),
+ Arrays.asList("1rrr", "2ggg"),
+ Arrays.asList("1vvv", "2bbb"),
+ Arrays.asList("1fff", "2"));
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ FieldVector vector = root.getFieldVectors().get(0);
+
+ checkArrayResult(data, (ListVector) vector);
+ }
+
+ @Test
+ public void testMapType() throws Exception {
+ Schema schema = getSchema("test_map.avsc");
+
+ List keys = Arrays.asList("key1", "key2", "key3", "key4", "key5", "key6");
+ List vals = Arrays.asList("val1", "val2", "val3", "val4", "val5", "val6");
+
+ List<LinkedHashMap> data = new ArrayList<>();
+ LinkedHashMap map1 = new LinkedHashMap();
+ map1.put(keys.get(0), vals.get(0));
+ map1.put(keys.get(1), vals.get(1));
+ data.add(map1);
+
+ LinkedHashMap map2 = new LinkedHashMap();
+ map2.put(keys.get(2), vals.get(2));
+ map2.put(keys.get(3), vals.get(3));
+ data.add(map2);
+
+ LinkedHashMap map3 = new LinkedHashMap();
+ map3.put(keys.get(4), vals.get(4));
+ map3.put(keys.get(5), vals.get(5));
+ data.add(map3);
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ MapVector vector = (MapVector) root.getFieldVectors().get(0);
+
+ checkPrimitiveResult(keys, vector.getDataVector().getChildrenFromFields().get(0));
+ checkPrimitiveResult(vals, vector.getDataVector().getChildrenFromFields().get(1));
+ assertEquals(0, vector.getOffsetBuffer().getInt(0));
+ assertEquals(2, vector.getOffsetBuffer().getInt(1 * 4));
+ assertEquals(4, vector.getOffsetBuffer().getInt(2 * 4));
+ assertEquals(6, vector.getOffsetBuffer().getInt(3 * 4));
+ }
+
+ @Test
+ public void testFixedType() throws Exception {
+ Schema schema = getSchema("test_fixed.avsc");
+
+ List<GenericData.Fixed> data = new ArrayList<>();
+ List<byte[]> expected = new ArrayList<>();
+ for (int i = 0; i < 5; i++) {
+ byte[] value = ("value" + i).getBytes(StandardCharsets.UTF_8);
+ expected.add(value);
+ GenericData.Fixed fixed = new GenericData.Fixed(schema);
+ fixed.bytes(value);
+ data.add(fixed);
+ }
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ FieldVector vector = root.getFieldVectors().get(0);
+
+ checkPrimitiveResult(expected, vector);
+ }
+
+ @Test
+ public void testUnionType() throws Exception {
+ Schema schema = getSchema("test_union.avsc");
+ ArrayList<GenericRecord> data = new ArrayList<>();
+ ArrayList<Object> expected = new ArrayList<>();
+ for (int i = 0; i < 5; i++) {
+ GenericRecord record = new GenericData.Record(schema);
+ record.put(0, i % 2 == 0 ? "test" + i : i);
+ expected.add(i % 2 == 0 ? "test" + i : i);
+ data.add(record);
+ }
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ FieldVector vector = root.getFieldVectors().get(0);
+
+ checkPrimitiveResult(expected, vector);
+ }
+
+ @Test
+ public void testNullableUnionType() throws Exception {
+ Schema schema = getSchema("test_nullable_union.avsc");
+ ArrayList<GenericRecord> data = new ArrayList<>();
+ ArrayList<Object> expected = new ArrayList<>();
+ for (int i = 0; i < 5; i++) {
+ GenericRecord record = new GenericData.Record(schema);
+ if (i % 3 == 0) {
+ record.put(0, "test" + i);
+ expected.add("test" + i);
+ data.add(record);
+ } else if (i % 3 == 1) {
+ record.put(0, i);
+ expected.add(i);
+ data.add(record);
+ } else {
+ record.put(0, null);
+ expected.add(null);
+ data.add(record);
+ }
+ }
+
+ VectorSchemaRoot root = writeAndRead(schema, data);
+ FieldVector vector = root.getFieldVectors().get(0);
+
+ checkPrimitiveResult(expected, vector);
+ }
+
+}
diff --git a/src/arrow/java/adapter/avro/src/test/java/org/apache/arrow/TestWriteReadAvroRecord.java b/src/arrow/java/adapter/avro/src/test/java/org/apache/arrow/TestWriteReadAvroRecord.java
new file mode 100644
index 000000000..bf695d193
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/java/org/apache/arrow/TestWriteReadAvroRecord.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileReader;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.io.DatumReader;
+import org.apache.avro.io.DatumWriter;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+
+public class TestWriteReadAvroRecord {
+
+ @ClassRule
+ public static final TemporaryFolder TMP = new TemporaryFolder();
+
+ @Test
+ public void testWriteAndRead() throws Exception {
+
+ File dataFile = TMP.newFile();
+ Path schemaPath = Paths.get(TestWriteReadAvroRecord.class.getResource("/").getPath(), "schema", "test.avsc");
+ Schema schema = new Schema.Parser().parse(schemaPath.toFile());
+
+ //write data to disk
+ GenericRecord user1 = new GenericData.Record(schema);
+ user1.put("name", "Alyssa");
+ user1.put("favorite_number", 256);
+
+ GenericRecord user2 = new GenericData.Record(schema);
+ user2.put("name", "Ben");
+ user2.put("favorite_number", 7);
+ user2.put("favorite_color", "red");
+
+ DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema);
+ DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter);
+ dataFileWriter.create(schema, dataFile);
+ dataFileWriter.append(user1);
+ dataFileWriter.append(user2);
+ dataFileWriter.close();
+
+ //read data from disk
+ DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema);
+ DataFileReader<GenericRecord>
+ dataFileReader = new DataFileReader<GenericRecord>(dataFile, datumReader);
+ List<GenericRecord> result = new ArrayList<>();
+ while (dataFileReader.hasNext()) {
+ GenericRecord user = dataFileReader.next();
+ result.add(user);
+ }
+
+ assertEquals(2, result.size());
+ GenericRecord deUser1 = result.get(0);
+ assertEquals("Alyssa", deUser1.get("name").toString());
+ assertEquals(256, deUser1.get("favorite_number"));
+ assertEquals(null, deUser1.get("favorite_color"));
+
+ GenericRecord deUser2 = result.get(1);
+ assertEquals("Ben", deUser2.get("name").toString());
+ assertEquals(7, deUser2.get("favorite_number"));
+ assertEquals("red", deUser2.get("favorite_color").toString());
+ }
+
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/attrs/test_enum_attrs.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/attrs/test_enum_attrs.avsc
new file mode 100644
index 000000000..afd00b8d9
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/attrs/test_enum_attrs.avsc
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "type": "enum",
+ "symbols" : ["SPADES", "HEARTS", "DIAMONDS", "CLUBS"],
+ "name": "testEnum",
+ "doc" : "enum doc",
+ "aliases" : ["alias1", "alias2"]
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/attrs/test_fixed_attr.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/attrs/test_fixed_attr.avsc
new file mode 100644
index 000000000..55e504def
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/attrs/test_fixed_attr.avsc
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "type": "fixed",
+ "size": 6,
+ "name": "testFixed",
+ "doc" : "fixed doc",
+ "aliases" : ["alias1", "alias2"]
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/attrs/test_record_attrs.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/attrs/test_record_attrs.avsc
new file mode 100644
index 000000000..2e2e311a9
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/attrs/test_record_attrs.avsc
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "record",
+ "name": "testAttrs",
+ "fields": [
+ {
+ "name" : "f0",
+ "type" : {
+ "type" : "record",
+ "name" : "nestedInRecord",
+ "doc" : "f0 doc",
+ "aliases" : ["f0.a1"],
+ "fields": [
+ {"name": "f1", "type": "string", "doc": "f1 doc", "aliases" : ["f1.a1", "f1.a2"]},
+ {"name": "f2", "type": "int", "doc": "f2 doc", "aliases" : ["f2.a1", "f2.a2"]}
+ ]
+ }
+ }
+ ]
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_date.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_date.avsc
new file mode 100644
index 000000000..f661e6506
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_date.avsc
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "name": "test",
+ "type": "int",
+ "logicalType" : "date"
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_decimal_invalid1.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_decimal_invalid1.avsc
new file mode 100644
index 000000000..18d7d63fc
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_decimal_invalid1.avsc
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "name": "test",
+ "type": "bytes",
+ "logicalType" : "decimal",
+ "precision": 39,
+ "scale": 2
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_decimal_invalid2.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_decimal_invalid2.avsc
new file mode 100644
index 000000000..eed7bd781
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_decimal_invalid2.avsc
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "name": "test",
+ "type": "bytes",
+ "logicalType" : "decimal",
+ "precision": 20,
+ "scale": -1
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_decimal_invalid3.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_decimal_invalid3.avsc
new file mode 100644
index 000000000..1667b8aff
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_decimal_invalid3.avsc
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "name": "test",
+ "type": "bytes",
+ "logicalType" : "decimal",
+ "precision": 20,
+ "scale": 40
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_decimal_invalid4.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_decimal_invalid4.avsc
new file mode 100644
index 000000000..e1f710416
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_decimal_invalid4.avsc
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "name": "test",
+ "type": "fixed",
+ "size" : 1,
+ "logicalType" : "decimal",
+ "precision": 30,
+ "scale": 2
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_decimal_with_original_bytes.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_decimal_with_original_bytes.avsc
new file mode 100644
index 000000000..944b5d85d
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_decimal_with_original_bytes.avsc
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "name": "test",
+ "type": "bytes",
+ "logicalType" : "decimal",
+ "precision": 10,
+ "scale": 2
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_decimal_with_original_fixed.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_decimal_with_original_fixed.avsc
new file mode 100644
index 000000000..1901f90a9
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_decimal_with_original_fixed.avsc
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "name": "test",
+ "type": "fixed",
+ "size" : 10,
+ "logicalType" : "decimal",
+ "precision": 10,
+ "scale": 2
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_time_micros.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_time_micros.avsc
new file mode 100644
index 000000000..ee7d4e937
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_time_micros.avsc
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "name": "test",
+ "type": "long",
+ "logicalType" : "time-micros"
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_time_millis.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_time_millis.avsc
new file mode 100644
index 000000000..54877babc
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_time_millis.avsc
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "name": "test",
+ "type": "int",
+ "logicalType" : "time-millis"
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_timestamp_micros.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_timestamp_micros.avsc
new file mode 100644
index 000000000..15c0bf53d
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_timestamp_micros.avsc
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "name": "test",
+ "type": "long",
+ "logicalType" : "timestamp-micros"
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_timestamp_millis.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_timestamp_millis.avsc
new file mode 100644
index 000000000..822a2c360
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_timestamp_millis.avsc
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "name": "test",
+ "type": "long",
+ "logicalType" : "timestamp-millis"
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_array_before.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_array_before.avsc
new file mode 100644
index 000000000..e836aa768
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_array_before.avsc
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "record",
+ "name": "test",
+ "fields": [
+ {"name": "f0", "type": "string"},
+ {"name": "f1", "type": {"type" : "array", "items": "string"}},
+ {"name": "f2", "type": "boolean"}
+ ]
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_array_expected.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_array_expected.avsc
new file mode 100644
index 000000000..36e7fdfb0
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_array_expected.avsc
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "record",
+ "name": "test",
+ "fields": [
+ {"name": "f0", "type": "string"},
+ {"name": "f2", "type": "boolean"}
+ ]
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_base1.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_base1.avsc
new file mode 100644
index 000000000..5338253f4
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_base1.avsc
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "record",
+ "name": "testRecord",
+ "fields": [
+ {"name": "f0", "type": {"type" : "fixed", "size":5, "name" : "fix"}},
+ {"name": "f1", "type": {"type" : "enum", "name" : "enum", "symbols": ["TEST0", "TEST1"]}},
+ {"name": "f2", "type": "string"},
+ {"name": "f3", "type": "bytes"}
+ ]
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_base2.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_base2.avsc
new file mode 100644
index 000000000..50655a70e
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_base2.avsc
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "record",
+ "name": "testRecord",
+ "fields": [
+ {"name": "f0", "type": "boolean"},
+ {"name": "f1", "type": "int"},
+ {"name": "f2", "type": "long"},
+ {"name": "f3", "type": "float"},
+ {"name": "f4", "type": "double"}
+ ]
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_boolean_expected.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_boolean_expected.avsc
new file mode 100644
index 000000000..9b62e3149
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_boolean_expected.avsc
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "record",
+ "name": "testRecord",
+ "fields": [
+ {"name": "f1", "type": "int"},
+ {"name": "f2", "type": "long"},
+ {"name": "f3", "type": "float"},
+ {"name": "f4", "type": "double"}
+ ]
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_bytes_expected.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_bytes_expected.avsc
new file mode 100644
index 000000000..8a1903b34
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_bytes_expected.avsc
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "record",
+ "name": "testRecord",
+ "fields": [
+ {"name": "f0", "type": {"type" : "fixed", "size":5, "name" : "fix"}},
+ {"name": "f1", "type": {"type" : "enum", "name" : "enum", "symbols": ["TEST0", "TEST1"]}},
+ {"name": "f2", "type": "string"}
+ ]
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_double_expected.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_double_expected.avsc
new file mode 100644
index 000000000..6021c4454
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_double_expected.avsc
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "record",
+ "name": "testRecord",
+ "fields": [
+ {"name": "f0", "type": "boolean"},
+ {"name": "f1", "type": "int"},
+ {"name": "f2", "type": "long"},
+ {"name": "f3", "type": "float"}
+ ]
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_enum_expected.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_enum_expected.avsc
new file mode 100644
index 000000000..f5ed86a28
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_enum_expected.avsc
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "record",
+ "name": "testRecord",
+ "fields": [
+ {"name": "f0", "type": {"type" : "fixed", "size":5, "name" : "fix"}},
+ {"name": "f2", "type": "string"},
+ {"name": "f3", "type": "bytes"}
+ ]
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_fixed_expected.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_fixed_expected.avsc
new file mode 100644
index 000000000..5423a7977
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_fixed_expected.avsc
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "record",
+ "name": "testRecord",
+ "fields": [
+ {"name": "f1", "type": {"type" : "enum", "name" : "enum", "symbols": ["TEST0", "TEST1"]}},
+ {"name": "f2", "type": "string"},
+ {"name": "f3", "type": "bytes"}
+ ]
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_float_expected.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_float_expected.avsc
new file mode 100644
index 000000000..dea106331
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_float_expected.avsc
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "record",
+ "name": "testRecord",
+ "fields": [
+ {"name": "f0", "type": "boolean"},
+ {"name": "f1", "type": "int"},
+ {"name": "f2", "type": "long"},
+ {"name": "f4", "type": "double"}
+ ]
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_int_expected.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_int_expected.avsc
new file mode 100644
index 000000000..53d4f1025
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_int_expected.avsc
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "record",
+ "name": "testRecord",
+ "fields": [
+ {"name": "f0", "type": "boolean"},
+ {"name": "f2", "type": "long"},
+ {"name": "f3", "type": "float"},
+ {"name": "f4", "type": "double"}
+ ]
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_long_expected.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_long_expected.avsc
new file mode 100644
index 000000000..bf16601dd
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_long_expected.avsc
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "record",
+ "name": "testRecord",
+ "fields": [
+ {"name": "f0", "type": "boolean"},
+ {"name": "f1", "type": "int"},
+ {"name": "f3", "type": "float"},
+ {"name": "f4", "type": "double"}
+ ]
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_map_before.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_map_before.avsc
new file mode 100644
index 000000000..8cbb1a1d7
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_map_before.avsc
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "record",
+ "name": "test",
+ "fields": [
+ {"name": "f0", "type": "string"},
+ {"name": "f1", "type": {"type" : "map", "values": "string"}},
+ {"name": "f2", "type": "boolean"}
+ ]
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_map_expected.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_map_expected.avsc
new file mode 100644
index 000000000..36e7fdfb0
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_map_expected.avsc
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "record",
+ "name": "test",
+ "fields": [
+ {"name": "f0", "type": "string"},
+ {"name": "f2", "type": "boolean"}
+ ]
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_multi_fields_expected.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_multi_fields_expected.avsc
new file mode 100644
index 000000000..b5d637b1d
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_multi_fields_expected.avsc
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "record",
+ "name": "testSkip",
+ "fields": [
+ {"name": "f0", "type": "string"}
+ ]
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_record_before.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_record_before.avsc
new file mode 100644
index 000000000..7aee92b92
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_record_before.avsc
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "record",
+ "name": "test",
+ "fields": [
+ {
+ "name" : "f0",
+ "type" : {
+ "type" : "record",
+ "name" : "nestedInRecord",
+ "fields": [
+ {"name": "f00", "type": "string"},
+ {"name": "f01", "type": "int"}
+ ]
+ }
+ },
+ {
+ "name" : "f1", "type" : "int"
+ }
+ ]
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_record_expected.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_record_expected.avsc
new file mode 100644
index 000000000..3e2495203
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_record_expected.avsc
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "record",
+ "name": "test",
+ "fields": [
+ { "name" : "f1", "type" : "int"}
+ ]
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_second_level_expected.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_second_level_expected.avsc
new file mode 100644
index 000000000..f3b7f8c09
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_second_level_expected.avsc
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "record",
+ "name": "testSkipNested",
+ "fields": [
+ {
+ "name" : "nested",
+ "type" : {
+ "type" : "record",
+ "name" : "nestedInRecord",
+ "fields": [
+ {"name": "f1", "type": "int"}
+ ]
+ }
+ }
+ ]
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_single_field_expected.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_single_field_expected.avsc
new file mode 100644
index 000000000..553525847
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_single_field_expected.avsc
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "record",
+ "name": "testSkip",
+ "fields": [
+ {"name": "f0", "type": "string"},
+ {"name": "f2", "type": "boolean"}
+ ]
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_string_expected.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_string_expected.avsc
new file mode 100644
index 000000000..2d2c08174
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_string_expected.avsc
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "record",
+ "name": "testRecord",
+ "fields": [
+ {"name": "f0", "type": {"type" : "fixed", "size":5, "name" : "fix"}},
+ {"name": "f1", "type": {"type" : "enum", "name" : "enum", "symbols": ["TEST0", "TEST1"]}},
+ {"name": "f3", "type": "bytes"}
+ ]
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_third_level_expected.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_third_level_expected.avsc
new file mode 100644
index 000000000..6f42da893
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_third_level_expected.avsc
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "record",
+ "name": "firstLevel",
+ "fields": [
+ {
+ "name" : "f0",
+ "type" : {
+ "type" : "record",
+ "name" : "secondLevel",
+ "fields": [
+ {
+ "name" : "f0",
+ "type" : {
+ "type" : "record",
+ "name" : "thirdLevel",
+ "fields" : [
+ {"name": "f1", "type": "int"},
+ {"name": "f0", "type": "string"},
+ {"name": "f2", "type": "boolean"}
+ ]
+ }
+ }
+ ]
+ }
+ }
+ ]
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_union_before.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_union_before.avsc
new file mode 100644
index 000000000..fc1105911
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_union_before.avsc
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "record",
+ "name": "test",
+ "fields": [
+ {"name": "f0", "type": ["string"]},
+ {"name": "f1", "type": ["string", "null"]},
+ {"name": "f2", "type": ["string", "int"]},
+ {"name": "f3", "type": "int"}
+ ]
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_union_multi_fields_expected.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_union_multi_fields_expected.avsc
new file mode 100644
index 000000000..308e027a2
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_union_multi_fields_expected.avsc
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "record",
+ "name": "test",
+ "fields": [
+ {"name": "f0", "type": ["string"]},
+ {"name": "f1", "type": ["string", "null"]},
+ {"name": "f3", "type": "int"}
+ ]
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_union_nullable_field_expected.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_union_nullable_field_expected.avsc
new file mode 100644
index 000000000..cbc83e566
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_union_nullable_field_expected.avsc
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "record",
+ "name": "test",
+ "fields": [
+ {"name": "f0", "type": ["string"]},
+ {"name": "f2", "type": ["string", "int"]},
+ {"name": "f3", "type": "int"}
+ ]
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_union_one_field_expected.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_union_one_field_expected.avsc
new file mode 100644
index 000000000..0f72fb432
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_union_one_field_expected.avsc
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "record",
+ "name": "test",
+ "fields": [
+ {"name": "f1", "type": ["string", "null"]},
+ {"name": "f2", "type": ["string", "int"]},
+ {"name": "f3", "type": ["string", "int"]}
+ ]
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test.avsc
new file mode 100644
index 000000000..92c0873de
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test.avsc
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "record",
+ "name": "User",
+ "fields": [
+ {"name": "name", "type": "string"},
+ {"name": "favorite_number", "type": ["int", "null"]},
+ {"name": "favorite_color", "type": ["string", "null"]}
+ ]
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_array.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_array.avsc
new file mode 100644
index 000000000..5b75a4031
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_array.avsc
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "array",
+ "items": "string",
+ "name": "testArray"
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_fixed.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_fixed.avsc
new file mode 100644
index 000000000..a4d96e9ab
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_fixed.avsc
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "fixed",
+ "size": 6,
+ "name": "testFixed"
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_large_data.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_large_data.avsc
new file mode 100644
index 000000000..f784ae623
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_large_data.avsc
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "record",
+ "name": "testLargeData",
+ "fields": [
+ {
+ "name": "f0",
+ "type": {
+ "name" : "f0",
+ "type" : "enum",
+ "symbols" : ["value1", "value2", "value3", "value4", "value5"]
+ }
+ },
+ {
+ "name" : "f1",
+ "type" : {
+ "type" : "record",
+ "name" : "nestedRecord",
+ "fields": [
+ {"name": "f1_0", "type": "string"},
+ {"name": "f1_1", "type": "int"}
+ ]
+ }
+ },
+
+ {"name": "f2", "type": "string"},
+ {"name": "f3", "type": "int"},
+ {"name": "f4", "type": "boolean"},
+ {"name": "f5", "type": "float"},
+ {"name": "f6", "type": "double"},
+ {"name": "f7", "type": "bytes"},
+ {"name": "f8", "type": ["string", "int"]},
+ {
+ "name": "f9",
+ "type": {
+ "name" : "f9",
+ "type" : "array",
+ "items" : "string"
+ }
+ },
+ {
+ "name": "f10",
+ "type": {
+ "name" : "f10",
+ "type" : "map",
+ "values" : "string"
+ }
+ },
+ {
+ "name": "f11",
+ "type": {
+ "type" : "fixed",
+ "name" : "f11",
+ "size" : 5
+ }
+ }
+ ]
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_map.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_map.avsc
new file mode 100644
index 000000000..0dfa3a595
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_map.avsc
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "map",
+ "values": "string",
+ "name": "testMap"
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_nested_record.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_nested_record.avsc
new file mode 100644
index 000000000..29dddfd1a
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_nested_record.avsc
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "record",
+ "name": "testNestedRecord",
+ "fields": [
+ {
+ "name" : "f0",
+ "type" : {
+ "type" : "record",
+ "name" : "nestedInRecord",
+ "fields": [
+ {"name": "f0", "type": "string"},
+ {"name": "f1", "type": "int"}
+ ]
+ }
+ }
+ ]
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_boolean.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_boolean.avsc
new file mode 100644
index 000000000..62af1a85d
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_boolean.avsc
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "record",
+ "name": "nullableBoolean",
+ "fields": [
+ {"name": "f0", "type": ["null", "boolean"]}
+ ]
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_bytes.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_bytes.avsc
new file mode 100644
index 000000000..002bc7ce2
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_bytes.avsc
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "record",
+ "name": "nullableBytes",
+ "fields": [
+ {"name": "f0", "type": ["null", "bytes"]}
+ ]
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_double.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_double.avsc
new file mode 100644
index 000000000..642b7aa16
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_double.avsc
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "record",
+ "name": "nullableDouble",
+ "fields": [
+ {"name": "f0", "type": ["null", "double"]}
+ ]
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_float.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_float.avsc
new file mode 100644
index 000000000..dff285909
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_float.avsc
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "record",
+ "name": "nullableFloat",
+ "fields": [
+ {"name": "f0", "type": ["null", "float"]}
+ ]
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_int.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_int.avsc
new file mode 100644
index 000000000..abb2fc48a
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_int.avsc
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "record",
+ "name": "nullableInt",
+ "fields": [
+ {"name": "f0", "type": ["null", "int"]}
+ ]
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_long.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_long.avsc
new file mode 100644
index 000000000..0624d2737
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_long.avsc
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "record",
+ "name": "nullableLong",
+ "fields": [
+ {"name": "f0", "type": ["null", "long"]}
+ ]
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_string.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_string.avsc
new file mode 100644
index 000000000..347808ce6
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_string.avsc
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "record",
+ "name": "nullableString",
+ "fields": [
+ {"name": "f0", "type": ["null", "string"]}
+ ]
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_union.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_union.avsc
new file mode 100644
index 000000000..af94812d7
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_union.avsc
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "record",
+ "name": "testNullableUnions",
+ "fields": [
+ {"name": "f0", "type": ["string", "int", "null"]}
+ ]
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_boolean.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_boolean.avsc
new file mode 100644
index 000000000..7652ce723
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_boolean.avsc
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "boolean",
+ "name": "TestBoolean"
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_bytes.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_bytes.avsc
new file mode 100644
index 000000000..5102430b6
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_bytes.avsc
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "bytes",
+ "name": "TestBytes"
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_double.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_double.avsc
new file mode 100644
index 000000000..d1ae0b605
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_double.avsc
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "double",
+ "name": "TestDouble"
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_enum.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_enum.avsc
new file mode 100644
index 000000000..bd8df6102
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_enum.avsc
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "enum",
+ "symbols" : ["SPADES", "HEARTS", "DIAMONDS", "CLUBS"],
+ "name": "testEnum"
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_float.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_float.avsc
new file mode 100644
index 000000000..675d1090d
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_float.avsc
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "float",
+ "name": "TestFloat"
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_int.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_int.avsc
new file mode 100644
index 000000000..8fc848828
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_int.avsc
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "int",
+ "name": "TestInt"
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_long.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_long.avsc
new file mode 100644
index 000000000..b9706107c
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_long.avsc
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "long",
+ "name": "TestLong"
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_string.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_string.avsc
new file mode 100644
index 000000000..b4a89a7f6
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_string.avsc
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "string",
+ "name": "TestString"
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_record.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_record.avsc
new file mode 100644
index 000000000..e83cf1180
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_record.avsc
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "record",
+ "name": "testRecord",
+ "fields": [
+ {"name": "f0", "type": "string"},
+ {"name": "f1", "type": "int"},
+ {"name": "f2", "type": "boolean"}
+ ]
+}
diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_union.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_union.avsc
new file mode 100644
index 000000000..f181e36e3
--- /dev/null
+++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_union.avsc
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "record",
+ "name": "testUnions",
+ "fields": [
+ {"name": "f0", "type": ["string", "int"]}
+ ]
+}
diff --git a/src/arrow/java/adapter/jdbc/pom.xml b/src/arrow/java/adapter/jdbc/pom.xml
new file mode 100644
index 000000000..a0e09fc0d
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/pom.xml
@@ -0,0 +1,96 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+ license agreements. See the NOTICE file distributed with this work for additional
+ information regarding copyright ownership. The ASF licenses this file to
+ You under the Apache License, Version 2.0 (the "License"); you may not use
+ this file except in compliance with the License. You may obtain a copy of
+ the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+ by applicable law or agreed to in writing, software distributed under the
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ OF ANY KIND, either express or implied. See the License for the specific
+ language governing permissions and limitations under the License. -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-java-root</artifactId>
+ <version>6.0.1</version>
+ <relativePath>../../pom.xml</relativePath>
+ </parent>
+
+ <artifactId>arrow-jdbc</artifactId>
+ <name>Arrow JDBC Adapter</name>
+ <description>(Contrib/Experimental)A library for converting JDBC data to Arrow data.</description>
+ <url>http://maven.apache.org</url>
+
+ <dependencies>
+
+ <!-- https://mvnrepository.com/artifact/org.apache.arrow/arrow-memory-core -->
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-memory-core</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+
+ <!-- https://mvnrepository.com/artifact/org.apache.arrow/arrow-memory-netty -->
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-memory-netty</artifactId>
+ <version>${project.version}</version>
+ <scope>runtime</scope>
+ </dependency>
+
+ <!-- https://mvnrepository.com/artifact/org.apache.arrow/arrow-vector -->
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-vector</artifactId>
+ <version>${project.version}</version>
+ <classifier>${arrow.vector.classifier}</classifier>
+ </dependency>
+
+ <!-- https://mvnrepository.com/artifact/com.h2database/h2 -->
+ <dependency>
+ <groupId>com.h2database</groupId>
+ <artifactId>h2</artifactId>
+ <version>1.4.196</version>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>com.fasterxml.jackson.dataformat</groupId>
+ <artifactId>jackson-dataformat-yaml</artifactId>
+ <version>${dep.jackson.version}</version>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-databind</artifactId>
+ <version>${dep.jackson.version}</version>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-core</artifactId>
+ <version>${dep.jackson.version}</version>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-annotations</artifactId>
+ <version>${dep.jackson.version}</version>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>io.netty</groupId>
+ <artifactId>netty-common</artifactId>
+ </dependency>
+
+ </dependencies>
+
+</project>
diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/ArrowVectorIterator.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/ArrowVectorIterator.java
new file mode 100644
index 000000000..0e833bcc2
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/ArrowVectorIterator.java
@@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc;
+
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowUtils.isColumnNullable;
+
+import java.sql.ResultSet;
+import java.sql.ResultSetMetaData;
+import java.sql.SQLException;
+import java.util.Iterator;
+
+import org.apache.arrow.adapter.jdbc.consumer.CompositeJdbcConsumer;
+import org.apache.arrow.adapter.jdbc.consumer.JdbcConsumer;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.util.ValueVectorUtility;
+
+/**
+ * VectorSchemaRoot iterator for partially converting JDBC data.
+ */
+public class ArrowVectorIterator implements Iterator<VectorSchemaRoot>, AutoCloseable {
+
+ private final ResultSet resultSet;
+ private final JdbcToArrowConfig config;
+
+ private final Schema schema;
+ private final ResultSetMetaData rsmd;
+
+ private final JdbcConsumer[] consumers;
+ final CompositeJdbcConsumer compositeConsumer;
+
+ // this is used only if resuing vector schema root is enabled.
+ private VectorSchemaRoot nextBatch;
+
+ private final int targetBatchSize;
+
+ /**
+ * Construct an instance.
+ */
+ private ArrowVectorIterator(ResultSet resultSet, JdbcToArrowConfig config) throws SQLException {
+ this.resultSet = resultSet;
+ this.config = config;
+ this.schema = JdbcToArrowUtils.jdbcToArrowSchema(resultSet.getMetaData(), config);
+ this.targetBatchSize = config.getTargetBatchSize();
+
+ rsmd = resultSet.getMetaData();
+ consumers = new JdbcConsumer[rsmd.getColumnCount()];
+ this.compositeConsumer = new CompositeJdbcConsumer(consumers);
+ }
+
+ private void initialize() throws SQLException {
+ // create consumers
+ for (int i = 1; i <= consumers.length; i++) {
+ ArrowType arrowType = config.getJdbcToArrowTypeConverter()
+ .apply(new JdbcFieldInfo(resultSet.getMetaData(), i));
+ consumers[i - 1] = JdbcToArrowUtils.getConsumer(
+ arrowType, i, isColumnNullable(resultSet, i), null, config);
+ }
+
+ this.nextBatch = config.isReuseVectorSchemaRoot() ? createVectorSchemaRoot() : null;
+ }
+
+ /**
+ * Create a ArrowVectorIterator to partially convert data.
+ */
+ public static ArrowVectorIterator create(
+ ResultSet resultSet,
+ JdbcToArrowConfig config)
+ throws SQLException {
+ ArrowVectorIterator iterator = null;
+ try {
+ iterator = new ArrowVectorIterator(resultSet, config);
+ iterator.initialize();
+ } catch (Throwable e) {
+ if (iterator != null) {
+ iterator.close();
+ }
+ throw new RuntimeException("Error occurred while creating iterator.", e);
+ }
+ return iterator;
+ }
+
+ private void consumeData(VectorSchemaRoot root) {
+ // consume data
+ try {
+ int readRowCount = 0;
+ if (targetBatchSize == JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE) {
+ while (resultSet.next()) {
+ ValueVectorUtility.ensureCapacity(root, readRowCount + 1);
+ compositeConsumer.consume(resultSet);
+ readRowCount++;
+ }
+ } else {
+ while (readRowCount < targetBatchSize && resultSet.next()) {
+ compositeConsumer.consume(resultSet);
+ readRowCount++;
+ }
+ }
+
+ root.setRowCount(readRowCount);
+ } catch (Throwable e) {
+ compositeConsumer.close();
+ throw new RuntimeException("Error occurred while consuming data.", e);
+ }
+ }
+
+ private VectorSchemaRoot createVectorSchemaRoot() {
+ VectorSchemaRoot root = null;
+ try {
+ root = VectorSchemaRoot.create(schema, config.getAllocator());
+ if (config.getTargetBatchSize() != JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE) {
+ ValueVectorUtility.preAllocate(root, config.getTargetBatchSize());
+ }
+ } catch (Throwable e) {
+ if (root != null) {
+ root.close();
+ }
+ throw new RuntimeException("Error occurred while creating schema root.", e);
+ }
+ return root;
+ }
+
+ // Loads the next schema root or null if no more rows are available.
+ private void load(VectorSchemaRoot root) throws SQLException {
+ for (int i = 0; i < consumers.length; i++) {
+ consumers[i].resetValueVector(root.getVector(i));
+ }
+
+ consumeData(root);
+ }
+
+ @Override
+ public boolean hasNext() {
+ try {
+ return !resultSet.isAfterLast();
+ } catch (SQLException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ /**
+ * Gets the next vector.
+ * If {@link JdbcToArrowConfig#isReuseVectorSchemaRoot()} is false,
+ * the client is responsible for freeing its resources.
+ */
+ @Override
+ public VectorSchemaRoot next() {
+ Preconditions.checkArgument(hasNext());
+ try {
+ VectorSchemaRoot ret = config.isReuseVectorSchemaRoot() ? nextBatch : createVectorSchemaRoot();
+ load(ret);
+ return ret;
+ } catch (Exception e) {
+ close();
+ throw new RuntimeException("Error occurred while getting next schema root.", e);
+ }
+ }
+
+ /**
+ * Clean up resources.
+ */
+ @Override
+ public void close() {
+ if (config.isReuseVectorSchemaRoot()) {
+ nextBatch.close();
+ }
+ compositeConsumer.close();
+ }
+}
diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/Constants.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/Constants.java
new file mode 100644
index 000000000..aaadacb54
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/Constants.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc;
+
+/**
+ * String constants used for metadata returned on Vectors.
+ */
+public class Constants {
+ private Constants() {}
+
+ public static final String SQL_CATALOG_NAME_KEY = "SQL_CATALOG_NAME";
+ public static final String SQL_TABLE_NAME_KEY = "SQL_TABLE_NAME";
+ public static final String SQL_COLUMN_NAME_KEY = "SQL_COLUMN_NAME";
+ public static final String SQL_TYPE_KEY = "SQL_TYPE";
+
+}
diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfo.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfo.java
new file mode 100644
index 000000000..e3747bb04
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfo.java
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc;
+
+import java.sql.ResultSetMetaData;
+import java.sql.SQLException;
+import java.sql.Types;
+
+import org.apache.arrow.util.Preconditions;
+
+/**
+ * This class represents the information about a JDBC ResultSet Field that is
+ * needed to construct an {@link org.apache.arrow.vector.types.pojo.ArrowType}.
+ * Currently, this is:
+ * <ul>
+ * <li>The JDBC {@link java.sql.Types} type.</li>
+ * <li>The field's precision (used for {@link java.sql.Types#DECIMAL} and {@link java.sql.Types#NUMERIC} types)</li>
+ * <li>The field's scale (used for {@link java.sql.Types#DECIMAL} and {@link java.sql.Types#NUMERIC} types)</li>
+ * </ul>
+ */
+public class JdbcFieldInfo {
+ private final int jdbcType;
+ private final int precision;
+ private final int scale;
+
+ /**
+ * Builds a <code>JdbcFieldInfo</code> using only the {@link java.sql.Types} type. Do not use this constructor
+ * if the field type is {@link java.sql.Types#DECIMAL} or {@link java.sql.Types#NUMERIC}; the precision and
+ * scale will be set to <code>0</code>.
+ *
+ * @param jdbcType The {@link java.sql.Types} type.
+ * @throws IllegalArgumentException if jdbcType is {@link java.sql.Types#DECIMAL} or {@link java.sql.Types#NUMERIC}.
+ */
+ public JdbcFieldInfo(int jdbcType) {
+ Preconditions.checkArgument(
+ (jdbcType != Types.DECIMAL && jdbcType != Types.NUMERIC),
+ "DECIMAL and NUMERIC types require a precision and scale; please use another constructor.");
+
+ this.jdbcType = jdbcType;
+ this.precision = 0;
+ this.scale = 0;
+ }
+
+ /**
+ * Builds a <code>JdbcFieldInfo</code> from the {@link java.sql.Types} type, precision, and scale.
+ * Use this constructor for {@link java.sql.Types#DECIMAL} and {@link java.sql.Types#NUMERIC} types.
+ *
+ * @param jdbcType The {@link java.sql.Types} type.
+ * @param precision The field's numeric precision.
+ * @param scale The field's numeric scale.
+ */
+ public JdbcFieldInfo(int jdbcType, int precision, int scale) {
+ this.jdbcType = jdbcType;
+ this.precision = precision;
+ this.scale = scale;
+ }
+
+ /**
+ * Builds a <code>JdbcFieldInfo</code> from the corresponding {@link java.sql.ResultSetMetaData} column.
+ *
+ * @param rsmd The {@link java.sql.ResultSetMetaData} to get the field information from.
+ * @param column The column to get the field information for (on a 1-based index).
+ * @throws SQLException If the column information cannot be retrieved.
+ * @throws NullPointerException if <code>rsmd</code> is <code>null</code>.
+ * @throws IllegalArgumentException if <code>column</code> is out of bounds.
+ */
+ public JdbcFieldInfo(ResultSetMetaData rsmd, int column) throws SQLException {
+ Preconditions.checkNotNull(rsmd, "ResultSetMetaData cannot be null.");
+ Preconditions.checkArgument(column > 0, "ResultSetMetaData columns have indices starting at 1.");
+ Preconditions.checkArgument(
+ column <= rsmd.getColumnCount(),
+ "The index must be within the number of columns (1 to %s, inclusive)", rsmd.getColumnCount());
+
+ this.jdbcType = rsmd.getColumnType(column);
+ this.precision = rsmd.getPrecision(column);
+ this.scale = rsmd.getScale(column);
+ }
+
+ /**
+ * The {@link java.sql.Types} type.
+ */
+ public int getJdbcType() {
+ return jdbcType;
+ }
+
+ /**
+ * The numeric precision, for {@link java.sql.Types#NUMERIC} and {@link java.sql.Types#DECIMAL} types.
+ */
+ public int getPrecision() {
+ return precision;
+ }
+
+ /**
+ * The numeric scale, for {@link java.sql.Types#NUMERIC} and {@link java.sql.Types#DECIMAL} types.
+ */
+ public int getScale() {
+ return scale;
+ }
+}
diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java
new file mode 100644
index 000000000..daee64d93
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc;
+
+import java.io.IOException;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.util.Preconditions;
+
+/**
+ * Utility class to convert JDBC objects to columnar Arrow format objects.
+ *
+ * <p>This utility uses following data mapping to map JDBC/SQL datatype to Arrow data types.
+ *
+ * <p>CHAR --> ArrowType.Utf8
+ * NCHAR --> ArrowType.Utf8
+ * VARCHAR --> ArrowType.Utf8
+ * NVARCHAR --> ArrowType.Utf8
+ * LONGVARCHAR --> ArrowType.Utf8
+ * LONGNVARCHAR --> ArrowType.Utf8
+ * NUMERIC --> ArrowType.Decimal(precision, scale)
+ * DECIMAL --> ArrowType.Decimal(precision, scale)
+ * BIT --> ArrowType.Bool
+ * TINYINT --> ArrowType.Int(8, signed)
+ * SMALLINT --> ArrowType.Int(16, signed)
+ * INTEGER --> ArrowType.Int(32, signed)
+ * BIGINT --> ArrowType.Int(64, signed)
+ * REAL --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)
+ * FLOAT --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)
+ * DOUBLE --> ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)
+ * BINARY --> ArrowType.Binary
+ * VARBINARY --> ArrowType.Binary
+ * LONGVARBINARY --> ArrowType.Binary
+ * DATE --> ArrowType.Date(DateUnit.MILLISECOND)
+ * TIME --> ArrowType.Time(TimeUnit.MILLISECOND, 32)
+ * TIMESTAMP --> ArrowType.Timestamp(TimeUnit.MILLISECOND, timezone=null)
+ * CLOB --> ArrowType.Utf8
+ * BLOB --> ArrowType.Binary
+ *
+ * @since 0.10.0
+ */
+public class JdbcToArrow {
+
+ /*----------------------------------------------------------------*
+ | |
+ | Partial Convert API |
+ | |
+ *----------------------------------------------------------------*/
+
+ /**
+ * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects.
+ * Note here uses the default targetBatchSize = 1024.
+ *
+ * @param resultSet ResultSet to use to fetch the data from underlying database
+ * @param allocator Memory allocator
+ * @return Arrow Data Objects {@link ArrowVectorIterator}
+ * @throws SQLException on error
+ */
+ public static ArrowVectorIterator sqlToArrowVectorIterator(
+ ResultSet resultSet,
+ BufferAllocator allocator)
+ throws SQLException, IOException {
+ Preconditions.checkNotNull(allocator, "Memory Allocator object can not be null");
+
+ JdbcToArrowConfig config =
+ new JdbcToArrowConfig(allocator, JdbcToArrowUtils.getUtcCalendar());
+ return sqlToArrowVectorIterator(resultSet, config);
+ }
+
+ /**
+ * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects.
+ * Note if not specify {@link JdbcToArrowConfig#targetBatchSize}, will use default value 1024.
+ * @param resultSet ResultSet to use to fetch the data from underlying database
+ * @param config Configuration of the conversion from JDBC to Arrow.
+ * @return Arrow Data Objects {@link ArrowVectorIterator}
+ * @throws SQLException on error
+ */
+ public static ArrowVectorIterator sqlToArrowVectorIterator(
+ ResultSet resultSet,
+ JdbcToArrowConfig config)
+ throws SQLException, IOException {
+ Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");
+ Preconditions.checkNotNull(config, "The configuration cannot be null");
+ return ArrowVectorIterator.create(resultSet, config);
+ }
+}
diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java
new file mode 100644
index 000000000..250b0edd2
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java
@@ -0,0 +1,296 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc;
+
+import static org.apache.arrow.vector.types.FloatingPointPrecision.DOUBLE;
+import static org.apache.arrow.vector.types.FloatingPointPrecision.SINGLE;
+
+import java.sql.Types;
+import java.util.Calendar;
+import java.util.Map;
+import java.util.function.Function;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.types.DateUnit;
+import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+
+/**
+ * This class configures the JDBC-to-Arrow conversion process.
+ * <p>
+ * The allocator is used to construct the {@link org.apache.arrow.vector.VectorSchemaRoot},
+ * and the calendar is used to define the time zone of any
+ * {@link org.apache.arrow.vector.types.pojo.ArrowType.Timestamp}
+ * fields that are created during the conversion. Neither field may be <code>null</code>.
+ * </p>
+ * <p>
+ * If the <code>includeMetadata</code> flag is set, the Arrow field metadata will contain information
+ * from the corresponding {@link java.sql.ResultSetMetaData} that was used to create the
+ * {@link org.apache.arrow.vector.types.pojo.FieldType} of the corresponding
+ * {@link org.apache.arrow.vector.FieldVector}.
+ * </p>
+ * <p>
+ * If there are any {@link java.sql.Types#ARRAY} fields in the {@link java.sql.ResultSet}, the corresponding
+ * {@link JdbcFieldInfo} for the array's contents must be defined here. Unfortunately, the sub-type
+ * information cannot be retrieved from all JDBC implementations (H2 for example, returns
+ * {@link java.sql.Types#NULL} for the array sub-type), so it must be configured here. The column index
+ * or name can be used to map to a {@link JdbcFieldInfo}, and that will be used for the conversion.
+ * </p>
+ */
+public final class JdbcToArrowConfig {
+
+ private final Calendar calendar;
+ private final BufferAllocator allocator;
+ private final boolean includeMetadata;
+ private final boolean reuseVectorSchemaRoot;
+ private final Map<Integer, JdbcFieldInfo> arraySubTypesByColumnIndex;
+ private final Map<String, JdbcFieldInfo> arraySubTypesByColumnName;
+
+ public static final int DEFAULT_TARGET_BATCH_SIZE = 1024;
+ public static final int NO_LIMIT_BATCH_SIZE = -1;
+
+ /**
+ * The maximum rowCount to read each time when partially convert data.
+ * Default value is 1024 and -1 means disable partial read.
+ * default is -1 which means disable partial read.
+ * Note that this flag only useful for {@link JdbcToArrow#sqlToArrowVectorIterator}
+ * 1) if targetBatchSize != -1, it will convert full data into multiple vectors
+ * with valueCount no more than targetBatchSize.
+ * 2) if targetBatchSize == -1, it will convert full data into a single vector in {@link ArrowVectorIterator}
+ * </p>
+ */
+ private final int targetBatchSize;
+
+ private final Function<JdbcFieldInfo, ArrowType> jdbcToArrowTypeConverter;
+
+ /**
+ * Constructs a new configuration from the provided allocator and calendar. The <code>allocator</code>
+ * is used when constructing the Arrow vectors from the ResultSet, and the calendar is used to define
+ * Arrow Timestamp fields, and to read time-based fields from the JDBC <code>ResultSet</code>.
+ *
+ * @param allocator The memory allocator to construct the Arrow vectors with.
+ * @param calendar The calendar to use when constructing Timestamp fields and reading time-based results.
+ */
+ JdbcToArrowConfig(BufferAllocator allocator, Calendar calendar) {
+ this(allocator, calendar,
+ /* include metadata */ false,
+ /* reuse vector schema root */ false,
+ /* array sub-types by column index */ null,
+ /* array sub-types by column name */ null,
+ DEFAULT_TARGET_BATCH_SIZE, null);
+ }
+
+ /**
+ * Constructs a new configuration from the provided allocator and calendar. The <code>allocator</code>
+ * is used when constructing the Arrow vectors from the ResultSet, and the calendar is used to define
+ * Arrow Timestamp fields, and to read time-based fields from the JDBC <code>ResultSet</code>.
+ *
+ * @param allocator The memory allocator to construct the Arrow vectors with.
+ * @param calendar The calendar to use when constructing Timestamp fields and reading time-based results.
+ * @param includeMetadata Whether to include JDBC field metadata in the Arrow Schema Field metadata.
+ * @param reuseVectorSchemaRoot Whether to reuse the vector schema root for each data load.
+ * @param arraySubTypesByColumnIndex The type of the JDBC array at the column index (1-based).
+ * @param arraySubTypesByColumnName The type of the JDBC array at the column name.
+ * @param jdbcToArrowTypeConverter The function that maps JDBC field type information to arrow type. If set to null,
+ * the default mapping will be used, which is defined as:
+ * <ul>
+ * <li>CHAR --> ArrowType.Utf8</li>
+ * <li>NCHAR --> ArrowType.Utf8</li>
+ * <li>VARCHAR --> ArrowType.Utf8</li>
+ * <li>NVARCHAR --> ArrowType.Utf8</li>
+ * <li>LONGVARCHAR --> ArrowType.Utf8</li>
+ * <li>LONGNVARCHAR --> ArrowType.Utf8</li>
+ * <li>NUMERIC --> ArrowType.Decimal(precision, scale)</li>
+ * <li>DECIMAL --> ArrowType.Decimal(precision, scale)</li>
+ * <li>BIT --> ArrowType.Bool</li>
+ * <li>TINYINT --> ArrowType.Int(8, signed)</li>
+ * <li>SMALLINT --> ArrowType.Int(16, signed)</li>
+ * <li>INTEGER --> ArrowType.Int(32, signed)</li>
+ * <li>BIGINT --> ArrowType.Int(64, signed)</li>
+ * <li>REAL --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)</li>
+ * <li>FLOAT --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)</li>
+ * <li>DOUBLE --> ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)</li>
+ * <li>BINARY --> ArrowType.Binary</li>
+ * <li>VARBINARY --> ArrowType.Binary</li>
+ * <li>LONGVARBINARY --> ArrowType.Binary</li>
+ * <li>DATE --> ArrowType.Date(DateUnit.DAY)</li>
+ * <li>TIME --> ArrowType.Time(TimeUnit.MILLISECOND, 32)</li>
+ * <li>TIMESTAMP --> ArrowType.Timestamp(TimeUnit.MILLISECOND, calendar timezone)</li>
+ * <li>CLOB --> ArrowType.Utf8</li>
+ * <li>BLOB --> ArrowType.Binary</li>
+ * <li>NULL --> ArrowType.Null</li>
+ * </ul>
+ */
+ JdbcToArrowConfig(
+ BufferAllocator allocator,
+ Calendar calendar,
+ boolean includeMetadata,
+ boolean reuseVectorSchemaRoot,
+ Map<Integer, JdbcFieldInfo> arraySubTypesByColumnIndex,
+ Map<String, JdbcFieldInfo> arraySubTypesByColumnName,
+ int targetBatchSize,
+ Function<JdbcFieldInfo, ArrowType> jdbcToArrowTypeConverter) {
+ Preconditions.checkNotNull(allocator, "Memory allocator cannot be null");
+ this.allocator = allocator;
+ this.calendar = calendar;
+ this.includeMetadata = includeMetadata;
+ this.reuseVectorSchemaRoot = reuseVectorSchemaRoot;
+ this.arraySubTypesByColumnIndex = arraySubTypesByColumnIndex;
+ this.arraySubTypesByColumnName = arraySubTypesByColumnName;
+ this.targetBatchSize = targetBatchSize;
+
+ // set up type converter
+ this.jdbcToArrowTypeConverter = jdbcToArrowTypeConverter != null ? jdbcToArrowTypeConverter :
+ fieldInfo -> {
+ final String timezone;
+ if (calendar != null) {
+ timezone = calendar.getTimeZone().getID();
+ } else {
+ timezone = null;
+ }
+
+ switch (fieldInfo.getJdbcType()) {
+ case Types.BOOLEAN:
+ case Types.BIT:
+ return new ArrowType.Bool();
+ case Types.TINYINT:
+ return new ArrowType.Int(8, true);
+ case Types.SMALLINT:
+ return new ArrowType.Int(16, true);
+ case Types.INTEGER:
+ return new ArrowType.Int(32, true);
+ case Types.BIGINT:
+ return new ArrowType.Int(64, true);
+ case Types.NUMERIC:
+ case Types.DECIMAL:
+ int precision = fieldInfo.getPrecision();
+ int scale = fieldInfo.getScale();
+ return new ArrowType.Decimal(precision, scale, 128);
+ case Types.REAL:
+ case Types.FLOAT:
+ return new ArrowType.FloatingPoint(SINGLE);
+ case Types.DOUBLE:
+ return new ArrowType.FloatingPoint(DOUBLE);
+ case Types.CHAR:
+ case Types.NCHAR:
+ case Types.VARCHAR:
+ case Types.NVARCHAR:
+ case Types.LONGVARCHAR:
+ case Types.LONGNVARCHAR:
+ case Types.CLOB:
+ return new ArrowType.Utf8();
+ case Types.DATE:
+ return new ArrowType.Date(DateUnit.DAY);
+ case Types.TIME:
+ return new ArrowType.Time(TimeUnit.MILLISECOND, 32);
+ case Types.TIMESTAMP:
+ return new ArrowType.Timestamp(TimeUnit.MILLISECOND, timezone);
+ case Types.BINARY:
+ case Types.VARBINARY:
+ case Types.LONGVARBINARY:
+ case Types.BLOB:
+ return new ArrowType.Binary();
+ case Types.ARRAY:
+ return new ArrowType.List();
+ case Types.NULL:
+ return new ArrowType.Null();
+ default:
+ // no-op, shouldn't get here
+ return null;
+ }
+ };
+ }
+
+ /**
+ * The calendar to use when defining Arrow Timestamp fields
+ * and retrieving {@link java.sql.Date}, {@link java.sql.Time}, or {@link java.sql.Timestamp}
+ * data types from the {@link java.sql.ResultSet}, or <code>null</code> if not converting.
+ *
+ * @return the calendar.
+ */
+ public Calendar getCalendar() {
+ return calendar;
+ }
+
+ /**
+ * The Arrow memory allocator.
+ * @return the allocator.
+ */
+ public BufferAllocator getAllocator() {
+ return allocator;
+ }
+
+ /**
+ * Whether to include JDBC ResultSet field metadata in the Arrow Schema field metadata.
+ *
+ * @return <code>true</code> to include field metadata, <code>false</code> to exclude it.
+ */
+ public boolean shouldIncludeMetadata() {
+ return includeMetadata;
+ }
+
+ /**
+ * Get the target batch size for partial read.
+ */
+ public int getTargetBatchSize() {
+ return targetBatchSize;
+ }
+
+ /**
+ * Get whether it is allowed to reuse the vector schema root.
+ */
+ public boolean isReuseVectorSchemaRoot() {
+ return reuseVectorSchemaRoot;
+ }
+
+ /**
+ * Gets the mapping between JDBC type information to Arrow type.
+ */
+ public Function<JdbcFieldInfo, ArrowType> getJdbcToArrowTypeConverter() {
+ return jdbcToArrowTypeConverter;
+ }
+
+ /**
+ * Returns the array sub-type {@link JdbcFieldInfo} defined for the provided column index.
+ *
+ * @param index The {@link java.sql.ResultSetMetaData} column index of an {@link java.sql.Types#ARRAY} type.
+ * @return The {@link JdbcFieldInfo} for that array's sub-type, or <code>null</code> if not defined.
+ */
+ public JdbcFieldInfo getArraySubTypeByColumnIndex(int index) {
+ if (arraySubTypesByColumnIndex == null) {
+ return null;
+ } else {
+ return arraySubTypesByColumnIndex.get(index);
+ }
+ }
+
+ /**
+ * Returns the array sub-type {@link JdbcFieldInfo} defined for the provided column name.
+ *
+ * @param name The {@link java.sql.ResultSetMetaData} column name of an {@link java.sql.Types#ARRAY} type.
+ * @return The {@link JdbcFieldInfo} for that array's sub-type, or <code>null</code> if not defined.
+ */
+ public JdbcFieldInfo getArraySubTypeByColumnName(String name) {
+ if (arraySubTypesByColumnName == null) {
+ return null;
+ } else {
+ return arraySubTypesByColumnName.get(name);
+ }
+ }
+}
diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java
new file mode 100644
index 000000000..3941d978f
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java
@@ -0,0 +1,201 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc;
+
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowConfig.DEFAULT_TARGET_BATCH_SIZE;
+
+import java.util.Calendar;
+import java.util.Map;
+import java.util.function.Function;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+
+/**
+ * This class builds {@link JdbcToArrowConfig}s.
+ */
+public class JdbcToArrowConfigBuilder {
+ private Calendar calendar;
+ private BufferAllocator allocator;
+ private boolean includeMetadata;
+ private boolean reuseVectorSchemaRoot;
+ private Map<Integer, JdbcFieldInfo> arraySubTypesByColumnIndex;
+ private Map<String, JdbcFieldInfo> arraySubTypesByColumnName;
+
+ private int targetBatchSize;
+ private Function<JdbcFieldInfo, ArrowType> jdbcToArrowTypeConverter;
+
+ /**
+ * Default constructor for the <code>JdbcToArrowConfigBuilder}</code>.
+ * Use the setter methods for the allocator and calendar; the allocator must be
+ * set. Otherwise, {@link #build()} will throw a {@link NullPointerException}.
+ */
+ public JdbcToArrowConfigBuilder() {
+ this.allocator = null;
+ this.calendar = null;
+ this.includeMetadata = false;
+ this.reuseVectorSchemaRoot = false;
+ this.arraySubTypesByColumnIndex = null;
+ this.arraySubTypesByColumnName = null;
+ }
+
+ /**
+ * Constructor for the <code>JdbcToArrowConfigBuilder</code>. The
+ * allocator is required, and a {@link NullPointerException}
+ * will be thrown if it is <code>null</code>.
+ * <p>
+ * The allocator is used to construct Arrow vectors from the JDBC ResultSet.
+ * The calendar is used to determine the time zone of {@link java.sql.Timestamp}
+ * fields and convert {@link java.sql.Date}, {@link java.sql.Time}, and
+ * {@link java.sql.Timestamp} fields to a single, common time zone when reading
+ * from the result set.
+ * </p>
+ *
+ * @param allocator The Arrow Vector memory allocator.
+ * @param calendar The calendar to use when constructing timestamp fields.
+ */
+ public JdbcToArrowConfigBuilder(BufferAllocator allocator, Calendar calendar) {
+ this();
+
+ Preconditions.checkNotNull(allocator, "Memory allocator cannot be null");
+
+ this.allocator = allocator;
+ this.calendar = calendar;
+ this.includeMetadata = false;
+ this.reuseVectorSchemaRoot = false;
+ this.targetBatchSize = DEFAULT_TARGET_BATCH_SIZE;
+ }
+
+ /**
+ * Constructor for the <code>JdbcToArrowConfigBuilder</code>. Both the
+ * allocator and calendar are required. A {@link NullPointerException}
+ * will be thrown if either of those arguments is <code>null</code>.
+ * <p>
+ * The allocator is used to construct Arrow vectors from the JDBC ResultSet.
+ * The calendar is used to determine the time zone of {@link java.sql.Timestamp}
+ * fields and convert {@link java.sql.Date}, {@link java.sql.Time}, and
+ * {@link java.sql.Timestamp} fields to a single, common time zone when reading
+ * from the result set.
+ * </p>
+ * <p>
+ * The <code>includeMetadata</code> argument, if <code>true</code> will cause
+ * various information about each database field to be added to the Vector
+ * Schema's field metadata.
+ * </p>
+ *
+ * @param allocator The Arrow Vector memory allocator.
+ * @param calendar The calendar to use when constructing timestamp fields.
+ */
+ public JdbcToArrowConfigBuilder(BufferAllocator allocator, Calendar calendar, boolean includeMetadata) {
+ this(allocator, calendar);
+ this.includeMetadata = includeMetadata;
+ }
+
+ /**
+ * Sets the memory allocator to use when constructing the Arrow vectors from the ResultSet.
+ *
+ * @param allocator the allocator to set.
+ * @exception NullPointerException if <code>allocator</code> is null.
+ */
+ public JdbcToArrowConfigBuilder setAllocator(BufferAllocator allocator) {
+ Preconditions.checkNotNull(allocator, "Memory allocator cannot be null");
+ this.allocator = allocator;
+ return this;
+ }
+
+ /**
+ * Sets the {@link Calendar} to use when constructing timestamp fields in the
+ * Arrow schema, and reading time-based fields from the JDBC <code>ResultSet</code>.
+ *
+ * @param calendar the calendar to set.
+ */
+ public JdbcToArrowConfigBuilder setCalendar(Calendar calendar) {
+ this.calendar = calendar;
+ return this;
+ }
+
+ /**
+ * Sets whether to include JDBC ResultSet field metadata in the Arrow Schema field metadata.
+ *
+ * @param includeMetadata Whether to include or exclude JDBC metadata in the Arrow Schema field metadata.
+ * @return This instance of the <code>JdbcToArrowConfig</code>, for chaining.
+ */
+ public JdbcToArrowConfigBuilder setIncludeMetadata(boolean includeMetadata) {
+ this.includeMetadata = includeMetadata;
+ return this;
+ }
+
+ /**
+ * Sets the mapping of column-index-to-{@link JdbcFieldInfo} used for columns of type {@link java.sql.Types#ARRAY}.
+ * The column index is 1-based, to match the JDBC column index.
+ *
+ * @param map The mapping.
+ * @return This instance of the <code>JdbcToArrowConfig</code>, for chaining.
+ */
+ public JdbcToArrowConfigBuilder setArraySubTypeByColumnIndexMap(Map<Integer, JdbcFieldInfo> map) {
+ this.arraySubTypesByColumnIndex = map;
+ return this;
+ }
+
+ /**
+ * Sets the mapping of column-name-to-{@link JdbcFieldInfo} used for columns of type {@link java.sql.Types#ARRAY}.
+ *
+ * @param map The mapping.
+ * @return This instance of the <code>JdbcToArrowConfig</code>, for chaining.
+ */
+ public JdbcToArrowConfigBuilder setArraySubTypeByColumnNameMap(Map<String, JdbcFieldInfo> map) {
+ this.arraySubTypesByColumnName = map;
+ return this;
+ }
+
+ public JdbcToArrowConfigBuilder setTargetBatchSize(int targetBatchSize) {
+ this.targetBatchSize = targetBatchSize;
+ return this;
+ }
+
+ public JdbcToArrowConfigBuilder setJdbcToArrowTypeConverter(
+ Function<JdbcFieldInfo, ArrowType> jdbcToArrowTypeConverter) {
+ this.jdbcToArrowTypeConverter = jdbcToArrowTypeConverter;
+ return this;
+ }
+
+ public JdbcToArrowConfigBuilder setReuseVectorSchemaRoot(boolean reuseVectorSchemaRoot) {
+ this.reuseVectorSchemaRoot = reuseVectorSchemaRoot;
+ return this;
+ }
+
+ /**
+ * This builds the {@link JdbcToArrowConfig} from the provided
+ * {@link BufferAllocator} and {@link Calendar}.
+ *
+ * @return The built {@link JdbcToArrowConfig}
+ * @throws NullPointerException if either the allocator or calendar was not set.
+ */
+ public JdbcToArrowConfig build() {
+ return new JdbcToArrowConfig(
+ allocator,
+ calendar,
+ includeMetadata,
+ reuseVectorSchemaRoot,
+ arraySubTypesByColumnIndex,
+ arraySubTypesByColumnName,
+ targetBatchSize,
+ jdbcToArrowTypeConverter);
+ }
+}
diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
new file mode 100644
index 000000000..e05f21d48
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
@@ -0,0 +1,341 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc;
+
+import java.io.IOException;
+import java.sql.Date;
+import java.sql.ResultSet;
+import java.sql.ResultSetMetaData;
+import java.sql.SQLException;
+import java.sql.Time;
+import java.sql.Timestamp;
+import java.util.ArrayList;
+import java.util.Calendar;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.TimeZone;
+
+import org.apache.arrow.adapter.jdbc.consumer.ArrayConsumer;
+import org.apache.arrow.adapter.jdbc.consumer.BigIntConsumer;
+import org.apache.arrow.adapter.jdbc.consumer.BinaryConsumer;
+import org.apache.arrow.adapter.jdbc.consumer.BitConsumer;
+import org.apache.arrow.adapter.jdbc.consumer.CompositeJdbcConsumer;
+import org.apache.arrow.adapter.jdbc.consumer.DateConsumer;
+import org.apache.arrow.adapter.jdbc.consumer.DecimalConsumer;
+import org.apache.arrow.adapter.jdbc.consumer.DoubleConsumer;
+import org.apache.arrow.adapter.jdbc.consumer.FloatConsumer;
+import org.apache.arrow.adapter.jdbc.consumer.IntConsumer;
+import org.apache.arrow.adapter.jdbc.consumer.JdbcConsumer;
+import org.apache.arrow.adapter.jdbc.consumer.NullConsumer;
+import org.apache.arrow.adapter.jdbc.consumer.SmallIntConsumer;
+import org.apache.arrow.adapter.jdbc.consumer.TimeConsumer;
+import org.apache.arrow.adapter.jdbc.consumer.TimestampConsumer;
+import org.apache.arrow.adapter.jdbc.consumer.TimestampTZConsumer;
+import org.apache.arrow.adapter.jdbc.consumer.TinyIntConsumer;
+import org.apache.arrow.adapter.jdbc.consumer.VarCharConsumer;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.BitVector;
+import org.apache.arrow.vector.DateDayVector;
+import org.apache.arrow.vector.DecimalVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.SmallIntVector;
+import org.apache.arrow.vector.TimeMilliVector;
+import org.apache.arrow.vector.TimeStampMilliTZVector;
+import org.apache.arrow.vector.TimeStampMilliVector;
+import org.apache.arrow.vector.TinyIntVector;
+import org.apache.arrow.vector.VarBinaryVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.util.ValueVectorUtility;
+
+/**
+ * Class that does most of the work to convert JDBC ResultSet data into Arrow columnar format Vector objects.
+ *
+ * @since 0.10.0
+ */
+public class JdbcToArrowUtils {
+
+ private static final int JDBC_ARRAY_VALUE_COLUMN = 2;
+
+ /**
+ * Returns the instance of a {java.util.Calendar} with the UTC time zone and root locale.
+ */
+ public static Calendar getUtcCalendar() {
+ return Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT);
+ }
+
+ /**
+ * Create Arrow {@link Schema} object for the given JDBC {@link ResultSetMetaData}.
+ *
+ * @param rsmd The ResultSetMetaData containing the results, to read the JDBC metadata from.
+ * @param calendar The calendar to use the time zone field of, to construct Timestamp fields from.
+ * @return {@link Schema}
+ * @throws SQLException on error
+ */
+ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar) throws SQLException {
+ Preconditions.checkNotNull(calendar, "Calendar object can't be null");
+
+ return jdbcToArrowSchema(rsmd, new JdbcToArrowConfig(new RootAllocator(0), calendar));
+ }
+
+ /**
+ * Create Arrow {@link Schema} object for the given JDBC {@link java.sql.ResultSetMetaData}.
+ *
+ * <p>
+ * If {@link JdbcToArrowConfig#shouldIncludeMetadata()} returns <code>true</code>, the following fields
+ * will be added to the {@link FieldType#getMetadata()}:
+ * <ul>
+ * <li>{@link Constants#SQL_CATALOG_NAME_KEY} representing {@link ResultSetMetaData#getCatalogName(int)}</li>
+ * <li>{@link Constants#SQL_TABLE_NAME_KEY} representing {@link ResultSetMetaData#getTableName(int)}</li>
+ * <li>{@link Constants#SQL_COLUMN_NAME_KEY} representing {@link ResultSetMetaData#getColumnLabel(int)}</li>
+ * <li>{@link Constants#SQL_TYPE_KEY} representing {@link ResultSetMetaData#getColumnTypeName(int)}</li>
+ * </ul>
+ * </p>
+ * <p>
+ * If any columns are of type {@link java.sql.Types#ARRAY}, the configuration object will be used to look up
+ * the array sub-type field. The {@link JdbcToArrowConfig#getArraySubTypeByColumnIndex(int)} method will be
+ * checked first, followed by the {@link JdbcToArrowConfig#getArraySubTypeByColumnName(String)} method.
+ * </p>
+ *
+ * @param rsmd The ResultSetMetaData containing the results, to read the JDBC metadata from.
+ * @param config The configuration to use when constructing the schema.
+ * @return {@link Schema}
+ * @throws SQLException on error
+ * @throws IllegalArgumentException if <code>rsmd</code> contains an {@link java.sql.Types#ARRAY} but the
+ * <code>config</code> does not have a sub-type definition for it.
+ */
+ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig config) throws SQLException {
+ Preconditions.checkNotNull(rsmd, "JDBC ResultSetMetaData object can't be null");
+ Preconditions.checkNotNull(config, "The configuration object must not be null");
+
+ List<Field> fields = new ArrayList<>();
+ int columnCount = rsmd.getColumnCount();
+ for (int i = 1; i <= columnCount; i++) {
+ final String columnName = rsmd.getColumnLabel(i);
+
+ final Map<String, String> metadata;
+ if (config.shouldIncludeMetadata()) {
+ metadata = new HashMap<>();
+ metadata.put(Constants.SQL_CATALOG_NAME_KEY, rsmd.getCatalogName(i));
+ metadata.put(Constants.SQL_TABLE_NAME_KEY, rsmd.getTableName(i));
+ metadata.put(Constants.SQL_COLUMN_NAME_KEY, columnName);
+ metadata.put(Constants.SQL_TYPE_KEY, rsmd.getColumnTypeName(i));
+
+ } else {
+ metadata = null;
+ }
+
+ final ArrowType arrowType = config.getJdbcToArrowTypeConverter().apply(new JdbcFieldInfo(rsmd, i));
+ if (arrowType != null) {
+ final FieldType fieldType = new FieldType(
+ isColumnNullable(rsmd, i), arrowType, /* dictionary encoding */ null, metadata);
+
+ List<Field> children = null;
+ if (arrowType.getTypeID() == ArrowType.List.TYPE_TYPE) {
+ final JdbcFieldInfo arrayFieldInfo = getJdbcFieldInfoForArraySubType(rsmd, i, config);
+ if (arrayFieldInfo == null) {
+ throw new IllegalArgumentException("Configuration does not provide a mapping for array column " + i);
+ }
+ children = new ArrayList<Field>();
+ final ArrowType childType = config.getJdbcToArrowTypeConverter().apply(arrayFieldInfo);
+ children.add(new Field("child", FieldType.nullable(childType), null));
+ }
+
+ fields.add(new Field(columnName, fieldType, children));
+ }
+ }
+
+ return new Schema(fields, null);
+ }
+
+ /* Uses the configuration to determine what the array sub-type JdbcFieldInfo is.
+ * If no sub-type can be found, returns null.
+ */
+ private static JdbcFieldInfo getJdbcFieldInfoForArraySubType(
+ ResultSetMetaData rsmd,
+ int arrayColumn,
+ JdbcToArrowConfig config)
+ throws SQLException {
+
+ Preconditions.checkNotNull(rsmd, "ResultSet MetaData object cannot be null");
+ Preconditions.checkNotNull(config, "Configuration must not be null");
+ Preconditions.checkArgument(
+ arrayColumn > 0,
+ "ResultSetMetaData columns start with 1; column cannot be less than 1");
+ Preconditions.checkArgument(
+ arrayColumn <= rsmd.getColumnCount(),
+ "Column number cannot be more than the number of columns");
+
+ JdbcFieldInfo fieldInfo = config.getArraySubTypeByColumnIndex(arrayColumn);
+ if (fieldInfo == null) {
+ fieldInfo = config.getArraySubTypeByColumnName(rsmd.getColumnLabel(arrayColumn));
+ }
+ return fieldInfo;
+ }
+
+ /**
+ * Iterate the given JDBC {@link ResultSet} object to fetch the data and transpose it to populate
+ * the given Arrow Vector objects.
+ *
+ * @param rs ResultSet to use to fetch the data from underlying database
+ * @param root Arrow {@link VectorSchemaRoot} object to populate
+ * @param calendar The calendar to use when reading {@link Date}, {@link Time}, or {@link Timestamp}
+ * data types from the {@link ResultSet}, or <code>null</code> if not converting.
+ * @throws SQLException on error
+ */
+ public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, Calendar calendar)
+ throws SQLException, IOException {
+
+ Preconditions.checkNotNull(calendar, "Calendar object can't be null");
+
+ jdbcToArrowVectors(rs, root, new JdbcToArrowConfig(new RootAllocator(0), calendar));
+ }
+
+ static boolean isColumnNullable(ResultSet resultSet, int index) throws SQLException {
+ return isColumnNullable(resultSet.getMetaData(), index);
+ }
+
+ static boolean isColumnNullable(ResultSetMetaData resultSetMetadata, int index) throws SQLException {
+ int nullableValue = resultSetMetadata.isNullable(index);
+ return nullableValue == ResultSetMetaData.columnNullable ||
+ nullableValue == ResultSetMetaData.columnNullableUnknown;
+ }
+
+ /**
+ * Iterate the given JDBC {@link ResultSet} object to fetch the data and transpose it to populate
+ * the given Arrow Vector objects.
+ *
+ * @param rs ResultSet to use to fetch the data from underlying database
+ * @param root Arrow {@link VectorSchemaRoot} object to populate
+ * @param config The configuration to use when reading the data.
+ * @throws SQLException on error
+ */
+ public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, JdbcToArrowConfig config)
+ throws SQLException, IOException {
+
+ ResultSetMetaData rsmd = rs.getMetaData();
+ int columnCount = rsmd.getColumnCount();
+
+ JdbcConsumer[] consumers = new JdbcConsumer[columnCount];
+ for (int i = 1; i <= columnCount; i++) {
+ FieldVector vector = root.getVector(rsmd.getColumnLabel(i));
+ consumers[i - 1] = getConsumer(vector.getField().getType(), i, isColumnNullable(rs, i), vector, config);
+ }
+
+ CompositeJdbcConsumer compositeConsumer = null;
+ // Only clean resources when occurs error,
+ // vectors within consumers are useful and users are responsible for its close.
+ try {
+ compositeConsumer = new CompositeJdbcConsumer(consumers);
+ int readRowCount = 0;
+ if (config.getTargetBatchSize() == JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE) {
+ while (rs.next()) {
+ ValueVectorUtility.ensureCapacity(root, readRowCount + 1);
+ compositeConsumer.consume(rs);
+ readRowCount++;
+ }
+ } else {
+ while (rs.next() && readRowCount < config.getTargetBatchSize()) {
+ compositeConsumer.consume(rs);
+ readRowCount++;
+ }
+ }
+
+ root.setRowCount(readRowCount);
+ } catch (Exception e) {
+ // error occurs and clean up resources.
+ if (compositeConsumer != null) {
+ compositeConsumer.close();
+ }
+ throw e;
+ }
+ }
+
+ static JdbcConsumer getConsumer(ArrowType arrowType, int columnIndex, boolean nullable,
+ FieldVector vector, JdbcToArrowConfig config) {
+ final Calendar calendar = config.getCalendar();
+
+ switch (arrowType.getTypeID()) {
+ case Bool:
+ return BitConsumer.createConsumer((BitVector) vector, columnIndex, nullable);
+ case Int:
+ switch (((ArrowType.Int) arrowType).getBitWidth()) {
+ case 8:
+ return TinyIntConsumer.createConsumer((TinyIntVector) vector, columnIndex, nullable);
+ case 16:
+ return SmallIntConsumer.createConsumer((SmallIntVector) vector, columnIndex, nullable);
+ case 32:
+ return IntConsumer.createConsumer((IntVector) vector, columnIndex, nullable);
+ case 64:
+ return BigIntConsumer.createConsumer((BigIntVector) vector, columnIndex, nullable);
+ default:
+ return null;
+ }
+ case Decimal:
+ return DecimalConsumer.createConsumer((DecimalVector) vector, columnIndex, nullable);
+ case FloatingPoint:
+ switch (((ArrowType.FloatingPoint) arrowType).getPrecision()) {
+ case SINGLE:
+ return FloatConsumer.createConsumer((Float4Vector) vector, columnIndex, nullable);
+ case DOUBLE:
+ return DoubleConsumer.createConsumer((Float8Vector) vector, columnIndex, nullable);
+ default:
+ return null;
+ }
+ case Utf8:
+ case LargeUtf8:
+ return VarCharConsumer.createConsumer((VarCharVector) vector, columnIndex, nullable);
+ case Binary:
+ case LargeBinary:
+ return BinaryConsumer.createConsumer((VarBinaryVector) vector, columnIndex, nullable);
+ case Date:
+ return DateConsumer.createConsumer((DateDayVector) vector, columnIndex, nullable, calendar);
+ case Time:
+ return TimeConsumer.createConsumer((TimeMilliVector) vector, columnIndex, nullable, calendar);
+ case Timestamp:
+ if (config.getCalendar() == null) {
+ return TimestampConsumer.createConsumer((TimeStampMilliVector) vector, columnIndex, nullable);
+ } else {
+ return TimestampTZConsumer.createConsumer((TimeStampMilliTZVector) vector, columnIndex, nullable, calendar);
+ }
+ case List:
+ FieldVector childVector = ((ListVector) vector).getDataVector();
+ JdbcConsumer delegate = getConsumer(childVector.getField().getType(), JDBC_ARRAY_VALUE_COLUMN,
+ childVector.getField().isNullable(), childVector, config);
+ return ArrayConsumer.createConsumer((ListVector) vector, delegate, columnIndex, nullable);
+ case Null:
+ return new NullConsumer((NullVector) vector);
+ default:
+ // no-op, shouldn't get here
+ throw new UnsupportedOperationException();
+ }
+ }
+}
diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ArrayConsumer.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ArrayConsumer.java
new file mode 100644
index 000000000..ed12f818a
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ArrayConsumer.java
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc.consumer;
+
+import java.io.IOException;
+import java.sql.Array;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.complex.ListVector;
+
+/**
+ * Consumer which consume array type values from {@link ResultSet}.
+ * Write the data to {@link org.apache.arrow.vector.complex.ListVector}.
+ */
+public abstract class ArrayConsumer extends BaseConsumer<ListVector> {
+
+ /**
+ * Creates a consumer for {@link ListVector}.
+ */
+ public static ArrayConsumer createConsumer(
+ ListVector vector, JdbcConsumer delegate, int index, boolean nullable) {
+ if (nullable) {
+ return new ArrayConsumer.NullableArrayConsumer(vector, delegate, index);
+ } else {
+ return new ArrayConsumer.NonNullableArrayConsumer(vector, delegate, index);
+ }
+ }
+
+ protected final JdbcConsumer delegate;
+
+ private final ValueVector innerVector;
+
+ protected int innerVectorIndex = 0;
+
+ /**
+ * Instantiate a ArrayConsumer.
+ */
+ public ArrayConsumer(ListVector vector, JdbcConsumer delegate, int index) {
+ super(vector, index);
+ this.delegate = delegate;
+ this.innerVector = vector.getDataVector();
+ }
+
+ @Override
+ public void close() throws Exception {
+ this.vector.close();
+ this.delegate.close();
+ }
+
+ void ensureInnerVectorCapacity(int targetCapacity) {
+ while (innerVector.getValueCapacity() < targetCapacity) {
+ innerVector.reAlloc();
+ }
+ }
+
+ /**
+ * Nullable consumer for {@link ListVector}.
+ */
+ static class NullableArrayConsumer extends ArrayConsumer {
+
+ /**
+ * Instantiate a nullable array consumer.
+ */
+ public NullableArrayConsumer(ListVector vector, JdbcConsumer delegate, int index) {
+ super(vector, delegate, index);
+ }
+
+ @Override
+ public void consume(ResultSet resultSet) throws SQLException, IOException {
+ final Array array = resultSet.getArray(columnIndexInResultSet);
+ if (!resultSet.wasNull()) {
+ vector.startNewValue(currentIndex);
+ int count = 0;
+ try (ResultSet rs = array.getResultSet()) {
+ while (rs.next()) {
+ ensureInnerVectorCapacity(innerVectorIndex + count + 1);
+ delegate.consume(rs);
+ count++;
+ }
+ }
+ vector.endValue(currentIndex, count);
+ innerVectorIndex += count;
+ }
+ currentIndex++;
+ }
+ }
+
+ /**
+ * Non-nullable consumer for {@link ListVector}.
+ */
+ static class NonNullableArrayConsumer extends ArrayConsumer {
+
+ /**
+ * Instantiate a nullable array consumer.
+ */
+ public NonNullableArrayConsumer(ListVector vector, JdbcConsumer delegate, int index) {
+ super(vector, delegate, index);
+ }
+
+ @Override
+ public void consume(ResultSet resultSet) throws SQLException, IOException {
+ final Array array = resultSet.getArray(columnIndexInResultSet);
+ vector.startNewValue(currentIndex);
+ int count = 0;
+ try (ResultSet rs = array.getResultSet()) {
+ while (rs.next()) {
+ ensureInnerVectorCapacity(innerVectorIndex + count + 1);
+ delegate.consume(rs);
+ count++;
+ }
+ }
+ vector.endValue(currentIndex, count);
+ innerVectorIndex += count;
+ currentIndex++;
+ }
+ }
+}
diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BaseConsumer.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BaseConsumer.java
new file mode 100644
index 000000000..2db128d3e
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BaseConsumer.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc.consumer;
+
+import org.apache.arrow.vector.ValueVector;
+
+/**
+ * Base class for all consumers.
+ * @param <V> vector type.
+ */
+public abstract class BaseConsumer<V extends ValueVector> implements JdbcConsumer<V> {
+
+ protected V vector;
+
+ protected final int columnIndexInResultSet;
+
+ protected int currentIndex;
+
+ /**
+ * Constructs a new consumer.
+ * @param vector the underlying vector for the consumer.
+ * @param index the column id for the consumer.
+ */
+ public BaseConsumer(V vector, int index) {
+ this.vector = vector;
+ this.columnIndexInResultSet = index;
+ }
+
+ @Override
+ public void close() throws Exception {
+ this.vector.close();
+ }
+
+ @Override
+ public void resetValueVector(V vector) {
+ this.vector = vector;
+ this.currentIndex = 0;
+ }
+}
diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BigIntConsumer.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BigIntConsumer.java
new file mode 100644
index 000000000..19c8efa91
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BigIntConsumer.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc.consumer;
+
+import java.sql.ResultSet;
+import java.sql.SQLException;
+
+import org.apache.arrow.vector.BigIntVector;
+
+/**
+ * Consumer which consume bigint type values from {@link ResultSet}.
+ * Write the data to {@link org.apache.arrow.vector.BigIntVector}.
+ */
+public class BigIntConsumer {
+
+ /**
+ * Creates a consumer for {@link BigIntVector}.
+ */
+ public static JdbcConsumer<BigIntVector> createConsumer(BigIntVector vector, int index, boolean nullable) {
+ if (nullable) {
+ return new NullableBigIntConsumer(vector, index);
+ } else {
+ return new NonNullableBigIntConsumer(vector, index);
+ }
+ }
+
+ /**
+ * Nullable consumer for big int.
+ */
+ static class NullableBigIntConsumer extends BaseConsumer<BigIntVector> {
+
+ /**
+ * Instantiate a BigIntConsumer.
+ */
+ public NullableBigIntConsumer(BigIntVector vector, int index) {
+ super(vector, index);
+ }
+
+ @Override
+ public void consume(ResultSet resultSet) throws SQLException {
+ long value = resultSet.getLong(columnIndexInResultSet);
+ if (!resultSet.wasNull()) {
+ // for fixed width vectors, we have allocated enough memory proactively,
+ // so there is no need to call the setSafe method here.
+ vector.set(currentIndex, value);
+ }
+ currentIndex++;
+ }
+ }
+
+ /**
+ * Non-nullable consumer for big int.
+ */
+ static class NonNullableBigIntConsumer extends BaseConsumer<BigIntVector> {
+
+ /**
+ * Instantiate a BigIntConsumer.
+ */
+ public NonNullableBigIntConsumer(BigIntVector vector, int index) {
+ super(vector, index);
+ }
+
+ @Override
+ public void consume(ResultSet resultSet) throws SQLException {
+ long value = resultSet.getLong(columnIndexInResultSet);
+ // for fixed width vectors, we have allocated enough memory proactively,
+ // so there is no need to call the setSafe method here.
+ vector.set(currentIndex, value);
+ currentIndex++;
+ }
+ }
+}
diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumer.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumer.java
new file mode 100644
index 000000000..8c5f61169
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumer.java
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc.consumer;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.vector.BitVectorHelper;
+import org.apache.arrow.vector.VarBinaryVector;
+
+/**
+ * Consumer which consume binary type values from {@link ResultSet}.
+ * Write the data to {@link org.apache.arrow.vector.VarBinaryVector}.
+ */
+public abstract class BinaryConsumer extends BaseConsumer<VarBinaryVector> {
+
+ /**
+ * Creates a consumer for {@link VarBinaryVector}.
+ */
+ public static BinaryConsumer createConsumer(VarBinaryVector vector, int index, boolean nullable) {
+ if (nullable) {
+ return new NullableBinaryConsumer(vector, index);
+ } else {
+ return new NonNullableBinaryConsumer(vector, index);
+ }
+ }
+
+ private final byte[] reuseBytes = new byte[1024];
+
+ /**
+ * Instantiate a BinaryConsumer.
+ */
+ public BinaryConsumer(VarBinaryVector vector, int index) {
+ super(vector, index);
+ if (vector != null) {
+ vector.allocateNewSafe();
+ }
+ }
+
+ /**
+ * consume a InputStream.
+ */
+ public void consume(InputStream is) throws IOException {
+ if (is != null) {
+ while (currentIndex >= vector.getValueCapacity()) {
+ vector.reallocValidityAndOffsetBuffers();
+ }
+ final int startOffset = vector.getStartOffset(currentIndex);
+ final ArrowBuf offsetBuffer = vector.getOffsetBuffer();
+ int dataLength = 0;
+ int read;
+ while ((read = is.read(reuseBytes)) != -1) {
+ while (vector.getDataBuffer().capacity() < (startOffset + dataLength + read)) {
+ vector.reallocDataBuffer();
+ }
+ vector.getDataBuffer().setBytes(startOffset + dataLength, reuseBytes, 0, read);
+ dataLength += read;
+ }
+ offsetBuffer.setInt((currentIndex + 1) * VarBinaryVector.OFFSET_WIDTH, startOffset + dataLength);
+ BitVectorHelper.setBit(vector.getValidityBuffer(), currentIndex);
+ vector.setLastSet(currentIndex);
+ }
+ }
+
+ public void moveWriterPosition() {
+ currentIndex++;
+ }
+
+ @Override
+ public void resetValueVector(VarBinaryVector vector) {
+ this.vector = vector;
+ this.vector.allocateNewSafe();
+ this.currentIndex = 0;
+ }
+
+ /**
+ * Consumer for nullable binary data.
+ */
+ static class NullableBinaryConsumer extends BinaryConsumer {
+
+ /**
+ * Instantiate a BinaryConsumer.
+ */
+ public NullableBinaryConsumer(VarBinaryVector vector, int index) {
+ super(vector, index);
+ }
+
+ @Override
+ public void consume(ResultSet resultSet) throws SQLException, IOException {
+ InputStream is = resultSet.getBinaryStream(columnIndexInResultSet);
+ if (!resultSet.wasNull()) {
+ consume(is);
+ }
+ moveWriterPosition();
+ }
+ }
+
+ /**
+ * Consumer for non-nullable binary data.
+ */
+ static class NonNullableBinaryConsumer extends BinaryConsumer {
+
+ /**
+ * Instantiate a BinaryConsumer.
+ */
+ public NonNullableBinaryConsumer(VarBinaryVector vector, int index) {
+ super(vector, index);
+ }
+
+ @Override
+ public void consume(ResultSet resultSet) throws SQLException, IOException {
+ InputStream is = resultSet.getBinaryStream(columnIndexInResultSet);
+ consume(is);
+ moveWriterPosition();
+ }
+ }
+}
diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BitConsumer.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BitConsumer.java
new file mode 100644
index 000000000..d2d94d0a4
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BitConsumer.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc.consumer;
+
+import java.sql.ResultSet;
+import java.sql.SQLException;
+
+import org.apache.arrow.vector.BitVector;
+
+/**
+ * Consumer which consume bit type values from {@link ResultSet}.
+ * Write the data to {@link BitVector}.
+ */
+public class BitConsumer {
+
+ /**
+ * Creates a consumer for {@link BitVector}.
+ */
+ public static JdbcConsumer<BitVector> createConsumer(BitVector vector, int index, boolean nullable) {
+ if (nullable) {
+ return new NullableBitConsumer(vector, index);
+ } else {
+ return new NonNullableBitConsumer(vector, index);
+ }
+ }
+
+ /**
+ * Nullable consumer for {@link BitVector}.
+ */
+ static class NullableBitConsumer extends BaseConsumer<BitVector> {
+
+ /**
+ * Instantiate a BitConsumer.
+ */
+ public NullableBitConsumer(BitVector vector, int index) {
+ super(vector, index);
+ }
+
+ @Override
+ public void consume(ResultSet resultSet) throws SQLException {
+ boolean value = resultSet.getBoolean(columnIndexInResultSet);
+ if (!resultSet.wasNull()) {
+ // for fixed width vectors, we have allocated enough memory proactively,
+ // so there is no need to call the setSafe method here.
+ vector.set(currentIndex, value ? 1 : 0);
+ }
+ currentIndex++;
+ }
+ }
+
+ /**
+ * Non-nullable consumer for {@link BitVector}.
+ */
+ static class NonNullableBitConsumer extends BaseConsumer<BitVector> {
+
+ /**
+ * Instantiate a BitConsumer.
+ */
+ public NonNullableBitConsumer(BitVector vector, int index) {
+ super(vector, index);
+ }
+
+ @Override
+ public void consume(ResultSet resultSet) throws SQLException {
+ boolean value = resultSet.getBoolean(columnIndexInResultSet);
+ // for fixed width vectors, we have allocated enough memory proactively,
+ // so there is no need to call the setSafe method here.
+ vector.set(currentIndex, value ? 1 : 0);
+ currentIndex++;
+ }
+ }
+}
diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BlobConsumer.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BlobConsumer.java
new file mode 100644
index 000000000..e57ecdf91
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BlobConsumer.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc.consumer;
+
+import java.io.IOException;
+import java.sql.Blob;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+
+import org.apache.arrow.vector.VarBinaryVector;
+
+/**
+ * Consumer which consume blob type values from {@link ResultSet}.
+ * Write the data to {@link VarBinaryVector}.
+ */
+public class BlobConsumer extends BaseConsumer<VarBinaryVector> {
+
+ private BinaryConsumer delegate;
+
+ private final boolean nullable;
+
+ /**
+ * Creates a consumer for {@link VarBinaryVector}.
+ */
+ public static BlobConsumer createConsumer(
+ BinaryConsumer delegate, int index, boolean nullable) {
+ return new BlobConsumer(delegate, index, nullable);
+ }
+
+ /**
+ * Instantiate a BlobConsumer.
+ */
+ public BlobConsumer(BinaryConsumer delegate, int index, boolean nullable) {
+ super(null, index);
+ this.delegate = delegate;
+ this.nullable = nullable;
+ }
+
+ @Override
+ public void consume(ResultSet resultSet) throws SQLException, IOException {
+ Blob blob = resultSet.getBlob(columnIndexInResultSet);
+ if (blob != null) {
+ delegate.consume(blob.getBinaryStream());
+ }
+ delegate.moveWriterPosition();
+ }
+
+ @Override
+ public void close() throws Exception {
+ delegate.close();
+ }
+
+ @Override
+ public void resetValueVector(VarBinaryVector vector) {
+ delegate = BinaryConsumer.createConsumer(vector, columnIndexInResultSet, nullable);
+ }
+}
diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ClobConsumer.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ClobConsumer.java
new file mode 100644
index 000000000..4694f1c7e
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ClobConsumer.java
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc.consumer;
+
+import java.nio.charset.StandardCharsets;
+import java.sql.Clob;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.vector.BitVectorHelper;
+import org.apache.arrow.vector.VarCharVector;
+
+import io.netty.util.internal.PlatformDependent;
+
+/**
+ * Consumer which consume clob type values from {@link ResultSet}.
+ * Write the data to {@link org.apache.arrow.vector.VarCharVector}.
+ */
+public abstract class ClobConsumer extends BaseConsumer<VarCharVector> {
+
+ /**
+ * Creates a consumer for {@link VarCharVector}.
+ */
+ public static ClobConsumer createConsumer(VarCharVector vector, int index, boolean nullable) {
+ if (nullable) {
+ return new NullableClobConsumer(vector, index);
+ } else {
+ return new NonNullableClobConsumer(vector, index);
+ }
+ }
+
+ private static final int BUFFER_SIZE = 256;
+
+ /**
+ * Instantiate a ClobConsumer.
+ */
+ public ClobConsumer(VarCharVector vector, int index) {
+ super(vector, index);
+ if (vector != null) {
+ vector.allocateNewSafe();
+ }
+ }
+
+ @Override
+ public void resetValueVector(VarCharVector vector) {
+ this.vector = vector;
+ this.vector.allocateNewSafe();
+ this.currentIndex = 0;
+ }
+
+ /**
+ * Nullable consumer for clob data.
+ */
+ static class NullableClobConsumer extends ClobConsumer {
+
+ /**
+ * Instantiate a ClobConsumer.
+ */
+ public NullableClobConsumer(VarCharVector vector, int index) {
+ super(vector, index);
+ }
+
+ @Override
+ public void consume(ResultSet resultSet) throws SQLException {
+ Clob clob = resultSet.getClob(columnIndexInResultSet);
+ if (!resultSet.wasNull()) {
+ if (clob != null) {
+ long length = clob.length();
+
+ int read = 1;
+ int readSize = length < BUFFER_SIZE ? (int) length : BUFFER_SIZE;
+ int totalBytes = 0;
+
+ ArrowBuf dataBuffer = vector.getDataBuffer();
+ ArrowBuf offsetBuffer = vector.getOffsetBuffer();
+ int startIndex = offsetBuffer.getInt(currentIndex * 4);
+ while (read <= length) {
+ String str = clob.getSubString(read, readSize);
+ byte[] bytes = str.getBytes(StandardCharsets.UTF_8);
+
+ while ((dataBuffer.writerIndex() + bytes.length) > dataBuffer.capacity()) {
+ vector.reallocDataBuffer();
+ }
+ PlatformDependent.copyMemory(bytes, 0,
+ dataBuffer.memoryAddress() + startIndex + totalBytes, bytes.length);
+
+ totalBytes += bytes.length;
+ read += readSize;
+ }
+ offsetBuffer.setInt((currentIndex + 1) * 4, startIndex + totalBytes);
+ BitVectorHelper.setBit(vector.getValidityBuffer(), currentIndex);
+ vector.setLastSet(currentIndex);
+ }
+ }
+ currentIndex++;
+ }
+ }
+
+ /**
+ * Non-nullable consumer for clob data.
+ */
+ static class NonNullableClobConsumer extends ClobConsumer {
+
+ /**
+ * Instantiate a ClobConsumer.
+ */
+ public NonNullableClobConsumer(VarCharVector vector, int index) {
+ super(vector, index);
+ }
+
+ @Override
+ public void consume(ResultSet resultSet) throws SQLException {
+ Clob clob = resultSet.getClob(columnIndexInResultSet);
+ if (clob != null) {
+ long length = clob.length();
+
+ int read = 1;
+ int readSize = length < BUFFER_SIZE ? (int) length : BUFFER_SIZE;
+ int totalBytes = 0;
+
+ ArrowBuf dataBuffer = vector.getDataBuffer();
+ ArrowBuf offsetBuffer = vector.getOffsetBuffer();
+ int startIndex = offsetBuffer.getInt(currentIndex * 4);
+ while (read <= length) {
+ String str = clob.getSubString(read, readSize);
+ byte[] bytes = str.getBytes(StandardCharsets.UTF_8);
+
+ while ((dataBuffer.writerIndex() + bytes.length) > dataBuffer.capacity()) {
+ vector.reallocDataBuffer();
+ }
+ PlatformDependent.copyMemory(bytes, 0,
+ dataBuffer.memoryAddress() + startIndex + totalBytes, bytes.length);
+
+ totalBytes += bytes.length;
+ read += readSize;
+ }
+ offsetBuffer.setInt((currentIndex + 1) * 4, startIndex + totalBytes);
+ BitVectorHelper.setBit(vector.getValidityBuffer(), currentIndex);
+ vector.setLastSet(currentIndex);
+ }
+
+ currentIndex++;
+ }
+ }
+}
diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/CompositeJdbcConsumer.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/CompositeJdbcConsumer.java
new file mode 100644
index 000000000..99cca71b1
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/CompositeJdbcConsumer.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc.consumer;
+
+import java.io.IOException;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+
+import org.apache.arrow.util.AutoCloseables;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+
+/**
+ * Composite consumer which hold all consumers.
+ * It manages the consume and cleanup process.
+ */
+public class CompositeJdbcConsumer implements JdbcConsumer {
+
+ private final JdbcConsumer[] consumers;
+
+ /**
+ * Construct an instance.
+ */
+ public CompositeJdbcConsumer(JdbcConsumer[] consumers) {
+ this.consumers = consumers;
+ }
+
+ @Override
+ public void consume(ResultSet rs) throws SQLException, IOException {
+ for (int i = 0; i < consumers.length; i++) {
+ consumers[i].consume(rs);
+ }
+ }
+
+ @Override
+ public void close() {
+
+ try {
+ // clean up
+ AutoCloseables.close(consumers);
+ } catch (Exception e) {
+ throw new RuntimeException("Error occurred while releasing resources.", e);
+ }
+
+ }
+
+ @Override
+ public void resetValueVector(ValueVector vector) {
+
+ }
+
+ /**
+ * Reset inner consumers through vectors in the vector schema root.
+ */
+ public void resetVectorSchemaRoot(VectorSchemaRoot root) {
+ assert root.getFieldVectors().size() == consumers.length;
+ for (int i = 0; i < consumers.length; i++) {
+ consumers[i].resetValueVector(root.getVector(i));
+ }
+ }
+}
diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DateConsumer.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DateConsumer.java
new file mode 100644
index 000000000..b9b83dacc
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DateConsumer.java
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc.consumer;
+
+import java.sql.Date;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.Calendar;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.arrow.vector.DateDayVector;
+import org.apache.arrow.vector.DateMilliVector;
+
+/**
+ * Consumer which consume date type values from {@link ResultSet}.
+ * Write the data to {@link org.apache.arrow.vector.DateDayVector}.
+ */
+public class DateConsumer {
+
+ /**
+ * Creates a consumer for {@link DateMilliVector}.
+ */
+ public static JdbcConsumer<DateDayVector> createConsumer(
+ DateDayVector vector, int index, boolean nullable, Calendar calendar) {
+ if (nullable) {
+ return new NullableDateConsumer(vector, index, calendar);
+ } else {
+ return new NonNullableDateConsumer(vector, index, calendar);
+ }
+ }
+
+ /**
+ * Nullable consumer for date.
+ */
+ static class NullableDateConsumer extends BaseConsumer<DateDayVector> {
+
+ protected final Calendar calendar;
+
+ /**
+ * Instantiate a DateConsumer.
+ */
+ public NullableDateConsumer(DateDayVector vector, int index) {
+ this(vector, index, /* calendar */null);
+ }
+
+ /**
+ * Instantiate a DateConsumer.
+ */
+ public NullableDateConsumer(DateDayVector vector, int index, Calendar calendar) {
+ super(vector, index);
+ this.calendar = calendar;
+ }
+
+ @Override
+ public void consume(ResultSet resultSet) throws SQLException {
+ Date date = calendar == null ? resultSet.getDate(columnIndexInResultSet) :
+ resultSet.getDate(columnIndexInResultSet, calendar);
+ if (!resultSet.wasNull()) {
+ // for fixed width vectors, we have allocated enough memory proactively,
+ // so there is no need to call the setSafe method here.
+ vector.set(currentIndex, Math.toIntExact(TimeUnit.MILLISECONDS.toDays(date.getTime())));
+ }
+ currentIndex++;
+ }
+ }
+
+ /**
+ * Non-nullable consumer for date.
+ */
+ static class NonNullableDateConsumer extends BaseConsumer<DateDayVector> {
+
+ protected final Calendar calendar;
+
+ /**
+ * Instantiate a DateConsumer.
+ */
+ public NonNullableDateConsumer(DateDayVector vector, int index) {
+ this(vector, index, /* calendar */null);
+ }
+
+ /**
+ * Instantiate a DateConsumer.
+ */
+ public NonNullableDateConsumer(DateDayVector vector, int index, Calendar calendar) {
+ super(vector, index);
+ this.calendar = calendar;
+ }
+
+ @Override
+ public void consume(ResultSet resultSet) throws SQLException {
+ Date date = calendar == null ? resultSet.getDate(columnIndexInResultSet) :
+ resultSet.getDate(columnIndexInResultSet, calendar);
+ // for fixed width vectors, we have allocated enough memory proactively,
+ // so there is no need to call the setSafe method here.
+ vector.set(currentIndex, Math.toIntExact(TimeUnit.MILLISECONDS.toDays(date.getTime())));
+ currentIndex++;
+ }
+ }
+}
+
+
diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DecimalConsumer.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DecimalConsumer.java
new file mode 100644
index 000000000..4498fdecc
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DecimalConsumer.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc.consumer;
+
+import java.math.BigDecimal;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+
+import org.apache.arrow.vector.DecimalVector;
+
+/**
+ * Consumer which consume decimal type values from {@link ResultSet}.
+ * Write the data to {@link org.apache.arrow.vector.DecimalVector}.
+ */
+public class DecimalConsumer {
+
+ /**
+ * Creates a consumer for {@link DecimalVector}.
+ */
+ public static JdbcConsumer<DecimalVector> createConsumer(DecimalVector vector, int index, boolean nullable) {
+ if (nullable) {
+ return new NullableDecimalConsumer(vector, index);
+ } else {
+ return new NonNullableDecimalConsumer(vector, index);
+ }
+ }
+
+ /**
+ * Consumer for nullable decimal.
+ */
+ static class NullableDecimalConsumer extends BaseConsumer<DecimalVector> {
+
+ /**
+ * Instantiate a DecimalConsumer.
+ */
+ public NullableDecimalConsumer(DecimalVector vector, int index) {
+ super(vector, index);
+ }
+
+ @Override
+ public void consume(ResultSet resultSet) throws SQLException {
+ BigDecimal value = resultSet.getBigDecimal(columnIndexInResultSet);
+ if (!resultSet.wasNull()) {
+ // for fixed width vectors, we have allocated enough memory proactively,
+ // so there is no need to call the setSafe method here.
+ vector.set(currentIndex, value);
+ }
+ currentIndex++;
+ }
+ }
+
+ /**
+ * Consumer for non-nullable decimal.
+ */
+ static class NonNullableDecimalConsumer extends BaseConsumer<DecimalVector> {
+
+ /**
+ * Instantiate a DecimalConsumer.
+ */
+ public NonNullableDecimalConsumer(DecimalVector vector, int index) {
+ super(vector, index);
+ }
+
+ @Override
+ public void consume(ResultSet resultSet) throws SQLException {
+ BigDecimal value = resultSet.getBigDecimal(columnIndexInResultSet);
+ // for fixed width vectors, we have allocated enough memory proactively,
+ // so there is no need to call the setSafe method here.
+ vector.set(currentIndex, value);
+ currentIndex++;
+ }
+ }
+}
diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DoubleConsumer.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DoubleConsumer.java
new file mode 100644
index 000000000..e3db95d15
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DoubleConsumer.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc.consumer;
+
+import java.sql.ResultSet;
+import java.sql.SQLException;
+
+import org.apache.arrow.vector.Float8Vector;
+
+/**
+ * Consumer which consume double type values from {@link ResultSet}.
+ * Write the data to {@link org.apache.arrow.vector.Float8Vector}.
+ */
+public class DoubleConsumer {
+
+ /**
+ * Creates a consumer for {@link Float8Vector}.
+ */
+ public static JdbcConsumer<Float8Vector> createConsumer(Float8Vector vector, int index, boolean nullable) {
+ if (nullable) {
+ return new NullableDoubleConsumer(vector, index);
+ } else {
+ return new NonNullableDoubleConsumer(vector, index);
+ }
+ }
+
+ /**
+ * Nullable double consumer.
+ */
+ static class NullableDoubleConsumer extends BaseConsumer<Float8Vector> {
+
+ /**
+ * Instantiate a DoubleConsumer.
+ */
+ public NullableDoubleConsumer(Float8Vector vector, int index) {
+ super(vector, index);
+ }
+
+ @Override
+ public void consume(ResultSet resultSet) throws SQLException {
+ double value = resultSet.getDouble(columnIndexInResultSet);
+ if (!resultSet.wasNull()) {
+ // for fixed width vectors, we have allocated enough memory proactively,
+ // so there is no need to call the setSafe method here.
+ vector.set(currentIndex, value);
+ }
+ currentIndex++;
+ }
+ }
+
+ /**
+ * Non-nullable double consumer.
+ */
+ static class NonNullableDoubleConsumer extends BaseConsumer<Float8Vector> {
+
+ /**
+ * Instantiate a DoubleConsumer.
+ */
+ public NonNullableDoubleConsumer(Float8Vector vector, int index) {
+ super(vector, index);
+ }
+
+ @Override
+ public void consume(ResultSet resultSet) throws SQLException {
+ double value = resultSet.getDouble(columnIndexInResultSet);
+ // for fixed width vectors, we have allocated enough memory proactively,
+ // so there is no need to call the setSafe method here.
+ vector.set(currentIndex, value);
+ currentIndex++;
+ }
+ }
+}
diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/FloatConsumer.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/FloatConsumer.java
new file mode 100644
index 000000000..830348fe9
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/FloatConsumer.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc.consumer;
+
+import java.sql.ResultSet;
+import java.sql.SQLException;
+
+import org.apache.arrow.vector.Float4Vector;
+
+/**
+ * Consumer which consume float type values from {@link ResultSet}.
+ * Write the data to {@link org.apache.arrow.vector.Float4Vector}.
+ */
+public class FloatConsumer {
+
+ /**
+ * Creates a consumer for {@link Float4Vector}.
+ */
+ public static JdbcConsumer<Float4Vector> createConsumer(Float4Vector vector, int index, boolean nullable) {
+ if (nullable) {
+ return new NullableFloatConsumer(vector, index);
+ } else {
+ return new NonNullableFloatConsumer(vector, index);
+ }
+ }
+
+ /**
+ * Nullable float consumer.
+ */
+ static class NullableFloatConsumer extends BaseConsumer<Float4Vector> {
+
+ /**
+ * Instantiate a FloatConsumer.
+ */
+ public NullableFloatConsumer(Float4Vector vector, int index) {
+ super(vector, index);
+ }
+
+ @Override
+ public void consume(ResultSet resultSet) throws SQLException {
+ float value = resultSet.getFloat(columnIndexInResultSet);
+ if (!resultSet.wasNull()) {
+ // for fixed width vectors, we have allocated enough memory proactively,
+ // so there is no need to call the setSafe method here.
+ vector.set(currentIndex, value);
+ }
+ currentIndex++;
+ }
+ }
+
+ /**
+ * Non-nullable float consumer.
+ */
+ static class NonNullableFloatConsumer extends BaseConsumer<Float4Vector> {
+
+ /**
+ * Instantiate a FloatConsumer.
+ */
+ public NonNullableFloatConsumer(Float4Vector vector, int index) {
+ super(vector, index);
+ }
+
+ @Override
+ public void consume(ResultSet resultSet) throws SQLException {
+ float value = resultSet.getFloat(columnIndexInResultSet);
+ // for fixed width vectors, we have allocated enough memory proactively,
+ // so there is no need to call the setSafe method here.
+ vector.set(currentIndex, value);
+ currentIndex++;
+ }
+ }
+}
diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/IntConsumer.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/IntConsumer.java
new file mode 100644
index 000000000..4e537d682
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/IntConsumer.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc.consumer;
+
+import java.sql.ResultSet;
+import java.sql.SQLException;
+
+import org.apache.arrow.vector.IntVector;
+
+/**
+ * Consumer which consume int type values from {@link ResultSet}.
+ * Write the data to {@link org.apache.arrow.vector.IntVector}.
+ */
+public class IntConsumer {
+
+ /**
+ * Creates a consumer for {@link IntVector}.
+ */
+ public static JdbcConsumer<IntVector> createConsumer(IntVector vector, int index, boolean nullable) {
+ if (nullable) {
+ return new NullableIntConsumer(vector, index);
+ } else {
+ return new NonNullableIntConsumer(vector, index);
+ }
+ }
+
+ /**
+ * Nullable consumer for int.
+ */
+ static class NullableIntConsumer extends BaseConsumer<IntVector> {
+
+ /**
+ * Instantiate a IntConsumer.
+ */
+ public NullableIntConsumer(IntVector vector, int index) {
+ super(vector, index);
+ }
+
+ @Override
+ public void consume(ResultSet resultSet) throws SQLException {
+ int value = resultSet.getInt(columnIndexInResultSet);
+ if (!resultSet.wasNull()) {
+ // for fixed width vectors, we have allocated enough memory proactively,
+ // so there is no need to call the setSafe method here.
+ vector.set(currentIndex, value);
+ }
+ currentIndex++;
+ }
+ }
+
+ /**
+ * Non-nullable consumer for int.
+ */
+ static class NonNullableIntConsumer extends BaseConsumer<IntVector> {
+
+ /**
+ * Instantiate a IntConsumer.
+ */
+ public NonNullableIntConsumer(IntVector vector, int index) {
+ super(vector, index);
+ }
+
+ @Override
+ public void consume(ResultSet resultSet) throws SQLException {
+ int value = resultSet.getInt(columnIndexInResultSet);
+ // for fixed width vectors, we have allocated enough memory proactively,
+ // so there is no need to call the setSafe method here.
+ vector.set(currentIndex, value);
+ currentIndex++;
+ }
+ }
+}
diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/JdbcConsumer.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/JdbcConsumer.java
new file mode 100644
index 000000000..480dfe3a1
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/JdbcConsumer.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc.consumer;
+
+import java.io.IOException;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+
+import org.apache.arrow.vector.ValueVector;
+
+/**
+ * An abstraction that is used to consume values from {@link ResultSet}.
+ * @param <T> The vector within consumer or its delegate, used for partially consume purpose.
+ */
+public interface JdbcConsumer<T extends ValueVector> extends AutoCloseable {
+
+ /**
+ * Consume a specific type value from {@link ResultSet} and write it to vector.
+ */
+ void consume(ResultSet resultSet) throws SQLException, IOException;
+
+ /**
+ * Close this consumer, do some clean work such as clear reuse ArrowBuf.
+ */
+ void close() throws Exception;
+
+ /**
+ * Reset the vector within consumer for partial read purpose.
+ */
+ void resetValueVector(T vector);
+}
diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/NullConsumer.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/NullConsumer.java
new file mode 100644
index 000000000..a79a029f4
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/NullConsumer.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc.consumer;
+
+import java.sql.ResultSet;
+import java.sql.SQLException;
+
+import org.apache.arrow.vector.NullVector;
+
+/**
+ * Consumer which consume null type values from ResultSet.
+ * Corresponding to {@link org.apache.arrow.vector.NullVector}.
+ */
+public class NullConsumer extends BaseConsumer<NullVector> {
+
+ public NullConsumer(NullVector vector) {
+ super(vector, 0);
+ }
+
+ @Override
+ public void consume(ResultSet resultSet) throws SQLException {
+ }
+}
diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/SmallIntConsumer.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/SmallIntConsumer.java
new file mode 100644
index 000000000..2edb3605b
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/SmallIntConsumer.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc.consumer;
+
+import java.sql.ResultSet;
+import java.sql.SQLException;
+
+import org.apache.arrow.vector.SmallIntVector;
+
+/**
+ * Consumer which consume smallInt type values from {@link ResultSet}.
+ * Write the data to {@link org.apache.arrow.vector.SmallIntVector}.
+ */
+public class SmallIntConsumer {
+
+ /**
+ * Creates a consumer for {@link SmallIntVector}.
+ */
+ public static BaseConsumer<SmallIntVector> createConsumer(SmallIntVector vector, int index, boolean nullable) {
+ if (nullable) {
+ return new NullableSmallIntConsumer(vector, index);
+ } else {
+ return new NonNullableSmallIntConsumer(vector, index);
+ }
+ }
+
+ /**
+ * Nullable consumer for small int.
+ */
+ static class NullableSmallIntConsumer extends BaseConsumer<SmallIntVector> {
+
+ /**
+ * Instantiate a SmallIntConsumer.
+ */
+ public NullableSmallIntConsumer(SmallIntVector vector, int index) {
+ super(vector, index);
+ }
+
+ @Override
+ public void consume(ResultSet resultSet) throws SQLException {
+ short value = resultSet.getShort(columnIndexInResultSet);
+ if (!resultSet.wasNull()) {
+ // for fixed width vectors, we have allocated enough memory proactively,
+ // so there is no need to call the setSafe method here.
+ vector.set(currentIndex, value);
+ }
+ currentIndex++;
+ }
+ }
+
+ /**
+ * Non-nullable consumer for small int.
+ */
+ static class NonNullableSmallIntConsumer extends BaseConsumer<SmallIntVector> {
+
+ /**
+ * Instantiate a SmallIntConsumer.
+ */
+ public NonNullableSmallIntConsumer(SmallIntVector vector, int index) {
+ super(vector, index);
+ }
+
+ @Override
+ public void consume(ResultSet resultSet) throws SQLException {
+ short value = resultSet.getShort(columnIndexInResultSet);
+ // for fixed width vectors, we have allocated enough memory proactively,
+ // so there is no need to call the setSafe method here.
+ vector.set(currentIndex, value);
+ currentIndex++;
+ }
+ }
+}
diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimeConsumer.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimeConsumer.java
new file mode 100644
index 000000000..4fa15ad79
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimeConsumer.java
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc.consumer;
+
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.sql.Time;
+import java.util.Calendar;
+
+import org.apache.arrow.vector.TimeMilliVector;
+
+/**
+ * Consumer which consume time type values from {@link ResultSet}.
+ * Write the data to {@link org.apache.arrow.vector.TimeMilliVector}.
+ */
+public abstract class TimeConsumer {
+
+ /**
+ * Creates a consumer for {@link TimeMilliVector}.
+ */
+ public static JdbcConsumer<TimeMilliVector> createConsumer(
+ TimeMilliVector vector, int index, boolean nullable, Calendar calendar) {
+ if (nullable) {
+ return new NullableTimeConsumer(vector, index, calendar);
+ } else {
+ return new NonNullableTimeConsumer(vector, index, calendar);
+ }
+ }
+
+ /**
+ * Nullable consumer for {@link TimeMilliVector}.
+ */
+ static class NullableTimeConsumer extends BaseConsumer<TimeMilliVector> {
+
+ protected final Calendar calendar;
+
+ /**
+ * Instantiate a TimeConsumer.
+ */
+ public NullableTimeConsumer(TimeMilliVector vector, int index) {
+ this(vector, index, /* calendar */null);
+ }
+
+ /**
+ * Instantiate a TimeConsumer.
+ */
+ public NullableTimeConsumer(TimeMilliVector vector, int index, Calendar calendar) {
+ super(vector, index);
+ this.calendar = calendar;
+ }
+
+ @Override
+ public void consume(ResultSet resultSet) throws SQLException {
+ Time time = calendar == null ? resultSet.getTime(columnIndexInResultSet) :
+ resultSet.getTime(columnIndexInResultSet, calendar);
+ if (!resultSet.wasNull()) {
+ // for fixed width vectors, we have allocated enough memory proactively,
+ // so there is no need to call the setSafe method here.
+ vector.set(currentIndex, (int) time.getTime());
+ }
+ currentIndex++;
+ }
+ }
+
+ /**
+ * Non-nullable consumer for {@link TimeMilliVector}.
+ */
+ static class NonNullableTimeConsumer extends BaseConsumer<TimeMilliVector> {
+
+ protected final Calendar calendar;
+
+ /**
+ * Instantiate a TimeConsumer.
+ */
+ public NonNullableTimeConsumer(TimeMilliVector vector, int index) {
+ this(vector, index, /* calendar */null);
+ }
+
+ /**
+ * Instantiate a TimeConsumer.
+ */
+ public NonNullableTimeConsumer(TimeMilliVector vector, int index, Calendar calendar) {
+ super(vector, index);
+ this.calendar = calendar;
+ }
+
+ @Override
+ public void consume(ResultSet resultSet) throws SQLException {
+ Time time = calendar == null ? resultSet.getTime(columnIndexInResultSet) :
+ resultSet.getTime(columnIndexInResultSet, calendar);
+ // for fixed width vectors, we have allocated enough memory proactively,
+ // so there is no need to call the setSafe method here.
+ vector.set(currentIndex, (int) time.getTime());
+ currentIndex++;
+ }
+ }
+}
diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampConsumer.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampConsumer.java
new file mode 100644
index 000000000..3351e7e78
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampConsumer.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc.consumer;
+
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.sql.Timestamp;
+
+import org.apache.arrow.vector.TimeStampMilliVector;
+
+/**
+ * Consumer which consume timestamp type values from {@link ResultSet}.
+ * Write the data to {@link TimeStampMilliVector}.
+ */
+public abstract class TimestampConsumer {
+
+ /**
+ * Creates a consumer for {@link TimeStampMilliVector}.
+ */
+ public static JdbcConsumer<TimeStampMilliVector> createConsumer(
+ TimeStampMilliVector vector, int index, boolean nullable) {
+ if (nullable) {
+ return new NullableTimestampConsumer(vector, index);
+ } else {
+ return new NonNullableTimestampConsumer(vector, index);
+ }
+ }
+
+ /**
+ * Nullable consumer for timestamp.
+ */
+ static class NullableTimestampConsumer extends BaseConsumer<TimeStampMilliVector> {
+
+ /**
+ * Instantiate a TimestampConsumer.
+ */
+ public NullableTimestampConsumer(TimeStampMilliVector vector, int index) {
+ super(vector, index);
+ }
+
+ @Override
+ public void consume(ResultSet resultSet) throws SQLException {
+ Timestamp timestamp = resultSet.getTimestamp(columnIndexInResultSet);
+ if (!resultSet.wasNull()) {
+ // for fixed width vectors, we have allocated enough memory proactively,
+ // so there is no need to call the setSafe method here.
+ vector.set(currentIndex, timestamp.getTime());
+ }
+ currentIndex++;
+ }
+ }
+
+ /**
+ * Non-nullable consumer for timestamp.
+ */
+ static class NonNullableTimestampConsumer extends BaseConsumer<TimeStampMilliVector> {
+
+ /**
+ * Instantiate a TimestampConsumer.
+ */
+ public NonNullableTimestampConsumer(TimeStampMilliVector vector, int index) {
+ super(vector, index);
+ }
+
+ @Override
+ public void consume(ResultSet resultSet) throws SQLException {
+ Timestamp timestamp = resultSet.getTimestamp(columnIndexInResultSet);
+ // for fixed width vectors, we have allocated enough memory proactively,
+ // so there is no need to call the setSafe method here.
+ vector.set(currentIndex, timestamp.getTime());
+ currentIndex++;
+ }
+ }
+}
diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampTZConsumer.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampTZConsumer.java
new file mode 100644
index 000000000..f08671f0b
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampTZConsumer.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc.consumer;
+
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.sql.Timestamp;
+import java.util.Calendar;
+
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.TimeStampMilliTZVector;
+
+/**
+ * Consumer which consume timestamp (with time zone) type values from {@link ResultSet}.
+ * Write the data to {@link TimeStampMilliTZVector}.
+ */
+public class TimestampTZConsumer {
+ /**
+ * Creates a consumer for {@link TimeStampMilliTZVector}.
+ */
+ public static JdbcConsumer<TimeStampMilliTZVector> createConsumer(
+ TimeStampMilliTZVector vector, int index, boolean nullable, Calendar calendar) {
+ Preconditions.checkArgument(calendar != null, "Calendar cannot be null");
+ if (nullable) {
+ return new TimestampTZConsumer.NullableTimestampTZConsumer(vector, index, calendar);
+ } else {
+ return new TimestampTZConsumer.NonNullableTimestampConsumer(vector, index, calendar);
+ }
+ }
+
+ /**
+ * Nullable consumer for timestamp (with time zone).
+ */
+ static class NullableTimestampTZConsumer extends BaseConsumer<TimeStampMilliTZVector> {
+
+ protected final Calendar calendar;
+
+ /**
+ * Instantiate a TimestampConsumer.
+ */
+ public NullableTimestampTZConsumer(TimeStampMilliTZVector vector, int index, Calendar calendar) {
+ super(vector, index);
+ this.calendar = calendar;
+ }
+
+ @Override
+ public void consume(ResultSet resultSet) throws SQLException {
+ Timestamp timestamp = resultSet.getTimestamp(columnIndexInResultSet, calendar);
+ if (!resultSet.wasNull()) {
+ // for fixed width vectors, we have allocated enough memory proactively,
+ // so there is no need to call the setSafe method here.
+ vector.set(currentIndex, timestamp.getTime());
+ }
+ currentIndex++;
+ }
+ }
+
+ /**
+ * Non-nullable consumer for timestamp (with time zone).
+ */
+ static class NonNullableTimestampConsumer extends BaseConsumer<TimeStampMilliTZVector> {
+
+ protected final Calendar calendar;
+
+ /**
+ * Instantiate a TimestampConsumer.
+ */
+ public NonNullableTimestampConsumer(TimeStampMilliTZVector vector, int index, Calendar calendar) {
+ super(vector, index);
+ this.calendar = calendar;
+ }
+
+ @Override
+ public void consume(ResultSet resultSet) throws SQLException {
+ Timestamp timestamp = resultSet.getTimestamp(columnIndexInResultSet, calendar);
+ // for fixed width vectors, we have allocated enough memory proactively,
+ // so there is no need to call the setSafe method here.
+ vector.set(currentIndex, timestamp.getTime());
+ currentIndex++;
+ }
+ }
+}
diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TinyIntConsumer.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TinyIntConsumer.java
new file mode 100644
index 000000000..40cf087a5
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TinyIntConsumer.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc.consumer;
+
+import java.sql.ResultSet;
+import java.sql.SQLException;
+
+import org.apache.arrow.vector.TinyIntVector;
+
+/**
+ * Consumer which consume tinyInt type values from {@link ResultSet}.
+ * Write the data to {@link org.apache.arrow.vector.TinyIntVector}.
+ */
+public abstract class TinyIntConsumer {
+
+ /**
+ * Creates a consumer for {@link TinyIntVector}.
+ */
+ public static JdbcConsumer<TinyIntVector> createConsumer(TinyIntVector vector, int index, boolean nullable) {
+ if (nullable) {
+ return new NullableTinyIntConsumer(vector, index);
+ } else {
+ return new NonNullableTinyIntConsumer(vector, index);
+ }
+ }
+
+ /**
+ * Nullable consumer for tiny int.
+ */
+ static class NullableTinyIntConsumer extends BaseConsumer<TinyIntVector> {
+
+ /**
+ * Instantiate a TinyIntConsumer.
+ */
+ public NullableTinyIntConsumer(TinyIntVector vector, int index) {
+ super(vector, index);
+ }
+
+ @Override
+ public void consume(ResultSet resultSet) throws SQLException {
+ byte value = resultSet.getByte(columnIndexInResultSet);
+ if (!resultSet.wasNull()) {
+ // for fixed width vectors, we have allocated enough memory proactively,
+ // so there is no need to call the setSafe method here.
+ vector.set(currentIndex, value);
+ }
+ currentIndex++;
+ }
+ }
+
+ /**
+ * Non-nullable consumer for tiny int.
+ */
+ static class NonNullableTinyIntConsumer extends BaseConsumer<TinyIntVector> {
+
+ /**
+ * Instantiate a TinyIntConsumer.
+ */
+ public NonNullableTinyIntConsumer(TinyIntVector vector, int index) {
+ super(vector, index);
+ }
+
+ @Override
+ public void consume(ResultSet resultSet) throws SQLException {
+ byte value = resultSet.getByte(columnIndexInResultSet);
+ // for fixed width vectors, we have allocated enough memory proactively,
+ // so there is no need to call the setSafe method here.
+ vector.set(currentIndex, value);
+ currentIndex++;
+ }
+ }
+}
diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/VarCharConsumer.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/VarCharConsumer.java
new file mode 100644
index 000000000..05333715b
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/VarCharConsumer.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc.consumer;
+
+import java.nio.charset.StandardCharsets;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+
+import org.apache.arrow.vector.VarCharVector;
+
+/**
+ * Consumer which consume varchar type values from {@link ResultSet}.
+ * Write the data to {@link org.apache.arrow.vector.VarCharVector}.
+ */
+public abstract class VarCharConsumer {
+
+ /**
+ * Creates a consumer for {@link VarCharVector}.
+ */
+ public static JdbcConsumer<VarCharVector> createConsumer(VarCharVector vector, int index, boolean nullable) {
+ if (nullable) {
+ return new NullableVarCharConsumer(vector, index);
+ } else {
+ return new NonNullableVarCharConsumer(vector, index);
+ }
+ }
+
+ /**
+ * Nullable consumer for var char.
+ */
+ static class NullableVarCharConsumer extends BaseConsumer<VarCharVector> {
+
+ /**
+ * Instantiate a VarCharConsumer.
+ */
+ public NullableVarCharConsumer(VarCharVector vector, int index) {
+ super(vector, index);
+ }
+
+ @Override
+ public void consume(ResultSet resultSet) throws SQLException {
+ String value = resultSet.getString(columnIndexInResultSet);
+ if (!resultSet.wasNull()) {
+ byte[] bytes = value.getBytes(StandardCharsets.UTF_8);
+ vector.setSafe(currentIndex, bytes);
+ }
+ currentIndex++;
+ }
+ }
+
+ /**
+ * Non-nullable consumer for var char.
+ */
+ static class NonNullableVarCharConsumer extends BaseConsumer<VarCharVector> {
+
+ /**
+ * Instantiate a VarCharConsumer.
+ */
+ public NonNullableVarCharConsumer(VarCharVector vector, int index) {
+ super(vector, index);
+ }
+
+ @Override
+ public void consume(ResultSet resultSet) throws SQLException {
+ String value = resultSet.getString(columnIndexInResultSet);
+ byte[] bytes = value.getBytes(StandardCharsets.UTF_8);
+ vector.setSafe(currentIndex, bytes);
+ currentIndex++;
+ }
+ }
+}
diff --git a/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/AbstractJdbcToArrowTest.java b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/AbstractJdbcToArrowTest.java
new file mode 100644
index 000000000..c3cd4574b
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/AbstractJdbcToArrowTest.java
@@ -0,0 +1,322 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc;
+
+import java.io.IOException;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.util.Calendar;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.util.ValueVectorUtility;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
+
+/**
+ * Class to abstract out some common test functionality for testing JDBC to Arrow.
+ */
+public abstract class AbstractJdbcToArrowTest {
+
+ protected static final String BIGINT = "BIGINT_FIELD5";
+ protected static final String BINARY = "BINARY_FIELD12";
+ protected static final String BIT = "BIT_FIELD17";
+ protected static final String BLOB = "BLOB_FIELD14";
+ protected static final String BOOL = "BOOL_FIELD2";
+ protected static final String CHAR = "CHAR_FIELD16";
+ protected static final String CLOB = "CLOB_FIELD15";
+ protected static final String DATE = "DATE_FIELD10";
+ protected static final String DECIMAL = "DECIMAL_FIELD6";
+ protected static final String DOUBLE = "DOUBLE_FIELD7";
+ protected static final String INT = "INT_FIELD1";
+ protected static final String REAL = "REAL_FIELD8";
+ protected static final String SMALLINT = "SMALLINT_FIELD4";
+ protected static final String TIME = "TIME_FIELD9";
+ protected static final String TIMESTAMP = "TIMESTAMP_FIELD11";
+ protected static final String TINYINT = "TINYINT_FIELD3";
+ protected static final String VARCHAR = "VARCHAR_FIELD13";
+ protected static final String NULL = "NULL_FIELD18";
+
+ protected Connection conn = null;
+ protected Table table;
+ protected boolean reuseVectorSchemaRoot;
+
+ /**
+ * This method creates Table object after reading YAML file.
+ *
+ * @param ymlFilePath path to file
+ * @return Table object
+ * @throws IOException on error
+ */
+ protected static Table getTable(String ymlFilePath, @SuppressWarnings("rawtypes") Class clss) throws IOException {
+ return new ObjectMapper(new YAMLFactory()).readValue(
+ clss.getClassLoader().getResourceAsStream(ymlFilePath), Table.class);
+ }
+
+
+ /**
+ * This method creates Connection object and DB table and also populate data into table for test.
+ *
+ * @throws SQLException on error
+ * @throws ClassNotFoundException on error
+ */
+ @Before
+ public void setUp() throws SQLException, ClassNotFoundException {
+ String url = "jdbc:h2:mem:JdbcToArrowTest";
+ String driver = "org.h2.Driver";
+ Class.forName(driver);
+ conn = DriverManager.getConnection(url);
+ try (Statement stmt = conn.createStatement();) {
+ stmt.executeUpdate(table.getCreate());
+ for (String insert : table.getData()) {
+ stmt.executeUpdate(insert);
+ }
+ }
+ }
+
+ /**
+ * Clean up method to close connection after test completes.
+ *
+ * @throws SQLException on error
+ */
+ @After
+ public void destroy() throws SQLException {
+ if (conn != null) {
+ conn.close();
+ conn = null;
+ }
+ }
+
+ /**
+ * Prepares test data and returns collection of Table object for each test iteration.
+ *
+ * @param testFiles files for test
+ * @param clss Class type
+ * @return Collection of Table objects
+ * @throws SQLException on error
+ * @throws ClassNotFoundException on error
+ * @throws IOException on error
+ */
+ public static Object[][] prepareTestData(String[] testFiles, @SuppressWarnings("rawtypes") Class clss)
+ throws SQLException, ClassNotFoundException, IOException {
+ Object[][] tableArr = new Object[testFiles.length][];
+ int i = 0;
+ for (String testFile : testFiles) {
+ tableArr[i++] = new Object[]{getTable(testFile, clss)};
+ }
+ return tableArr;
+ }
+
+ /**
+ * Abstract method to implement test Functionality to test JdbcToArrow methods.
+ *
+ * @throws SQLException on error
+ * @throws IOException on error
+ */
+ @Test
+ public abstract void testJdbcToArrowValues() throws SQLException, IOException;
+
+ /**
+ * Abstract method to implement logic to assert test various datatype values.
+ *
+ * @param root VectorSchemaRoot for test
+ */
+ public abstract void testDataSets(VectorSchemaRoot root);
+
+ /**
+ * For the given SQL query, execute and fetch the data from Relational DB and convert it to Arrow objects.
+ * This method uses the default Calendar instance with default TimeZone and Locale as returned by the JVM.
+ * If you wish to use specific TimeZone or Locale for any Date, Time and Timestamp datasets, you may want use
+ * overloaded API that taken Calendar object instance.
+ *
+ * This method is for test only.
+ *
+ * @param connection Database connection to be used. This method will not close the passed connection object. Since
+ * the caller has passed the connection object it's the responsibility of the caller to close or
+ * return the connection to the pool.
+ * @param query The DB Query to fetch the data.
+ * @param allocator Memory allocator
+ * @return Arrow Data Objects {@link VectorSchemaRoot}
+ * @throws SQLException Propagate any SQL Exceptions to the caller after closing any resources opened such as
+ * ResultSet and Statement objects.
+ */
+ public static VectorSchemaRoot sqlToArrow(Connection connection, String query, BufferAllocator allocator)
+ throws SQLException, IOException {
+ Preconditions.checkNotNull(allocator, "Memory allocator object can not be null");
+
+ JdbcToArrowConfig config =
+ new JdbcToArrowConfig(allocator, JdbcToArrowUtils.getUtcCalendar());
+ return sqlToArrow(connection, query, config);
+ }
+
+ /**
+ * For the given SQL query, execute and fetch the data from Relational DB and convert it to Arrow objects.
+ *
+ * This method is for test only.
+ *
+ * @param connection Database connection to be used. This method will not close the passed connection object. Since
+ * the caller has passed the connection object it's the responsibility of the caller to close or
+ * return the connection to the pool.
+ * @param query The DB Query to fetch the data.
+ * @param allocator Memory allocator
+ * @param calendar Calendar object to use to handle Date, Time and Timestamp datasets.
+ * @return Arrow Data Objects {@link VectorSchemaRoot}
+ * @throws SQLException Propagate any SQL Exceptions to the caller after closing any resources opened such as
+ * ResultSet and Statement objects.
+ */
+ public static VectorSchemaRoot sqlToArrow(
+ Connection connection,
+ String query,
+ BufferAllocator allocator,
+ Calendar calendar) throws SQLException, IOException {
+
+ Preconditions.checkNotNull(allocator, "Memory allocator object can not be null");
+ Preconditions.checkNotNull(calendar, "Calendar object can not be null");
+
+ return sqlToArrow(connection, query, new JdbcToArrowConfig(allocator, calendar));
+ }
+
+ /**
+ * For the given SQL query, execute and fetch the data from Relational DB and convert it to Arrow objects.
+ *
+ * This method is for test only.
+ *
+ * @param connection Database connection to be used. This method will not close the passed connection object.
+ * Since the caller has passed the connection object it's the responsibility of the caller
+ * to close or return the connection to the pool.
+ * @param query The DB Query to fetch the data.
+ * @param config Configuration
+ * @return Arrow Data Objects {@link VectorSchemaRoot}
+ * @throws SQLException Propagate any SQL Exceptions to the caller after closing any resources opened such as
+ * ResultSet and Statement objects.
+ */
+ public static VectorSchemaRoot sqlToArrow(Connection connection, String query, JdbcToArrowConfig config)
+ throws SQLException, IOException {
+ Preconditions.checkNotNull(connection, "JDBC connection object can not be null");
+ Preconditions.checkArgument(query != null && query.length() > 0, "SQL query can not be null or empty");
+
+ try (Statement stmt = connection.createStatement()) {
+ return sqlToArrow(stmt.executeQuery(query), config);
+ }
+ }
+
+ /**
+ * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects. This
+ * method uses the default RootAllocator and Calendar object.
+ *
+ * This method is for test only.
+ *
+ * @param resultSet ResultSet to use to fetch the data from underlying database
+ * @return Arrow Data Objects {@link VectorSchemaRoot}
+ * @throws SQLException on error
+ */
+ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet) throws SQLException, IOException {
+ Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");
+
+ return sqlToArrow(resultSet, JdbcToArrowUtils.getUtcCalendar());
+ }
+
+ /**
+ * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects.
+ *
+ * This method is for test only.
+ *
+ * @param resultSet ResultSet to use to fetch the data from underlying database
+ * @param allocator Memory allocator
+ * @return Arrow Data Objects {@link VectorSchemaRoot}
+ * @throws SQLException on error
+ */
+ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, BufferAllocator allocator)
+ throws SQLException, IOException {
+ Preconditions.checkNotNull(allocator, "Memory Allocator object can not be null");
+
+ JdbcToArrowConfig config =
+ new JdbcToArrowConfig(allocator, JdbcToArrowUtils.getUtcCalendar());
+ return sqlToArrow(resultSet, config);
+ }
+
+ /**
+ * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects.
+ *
+ * This method is for test only.
+ *
+ * @param resultSet ResultSet to use to fetch the data from underlying database
+ * @param calendar Calendar instance to use for Date, Time and Timestamp datasets, or <code>null</code> if none.
+ * @return Arrow Data Objects {@link VectorSchemaRoot}
+ * @throws SQLException on error
+ */
+ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, Calendar calendar) throws SQLException, IOException {
+ Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");
+ return sqlToArrow(resultSet, new JdbcToArrowConfig(new RootAllocator(Integer.MAX_VALUE), calendar));
+ }
+
+ /**
+ * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects.
+ *
+ * This method is for test only.
+ *
+ * @param resultSet ResultSet to use to fetch the data from underlying database
+ * @param allocator Memory allocator to use.
+ * @param calendar Calendar instance to use for Date, Time and Timestamp datasets, or <code>null</code> if none.
+ * @return Arrow Data Objects {@link VectorSchemaRoot}
+ * @throws SQLException on error
+ */
+ public static VectorSchemaRoot sqlToArrow(
+ ResultSet resultSet,
+ BufferAllocator allocator,
+ Calendar calendar)
+ throws SQLException, IOException {
+ Preconditions.checkNotNull(allocator, "Memory Allocator object can not be null");
+
+ return sqlToArrow(resultSet, new JdbcToArrowConfig(allocator, calendar));
+ }
+
+ /**
+ * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects.
+ *
+ * This method is for test only.
+ *
+ * @param resultSet ResultSet to use to fetch the data from underlying database
+ * @param config Configuration of the conversion from JDBC to Arrow.
+ * @return Arrow Data Objects {@link VectorSchemaRoot}
+ * @throws SQLException on error
+ */
+ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, JdbcToArrowConfig config)
+ throws SQLException, IOException {
+ Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");
+ Preconditions.checkNotNull(config, "The configuration cannot be null");
+
+ VectorSchemaRoot root = VectorSchemaRoot.create(
+ JdbcToArrowUtils.jdbcToArrowSchema(resultSet.getMetaData(), config), config.getAllocator());
+ if (config.getTargetBatchSize() != JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE) {
+ ValueVectorUtility.preAllocate(root, config.getTargetBatchSize());
+ }
+ JdbcToArrowUtils.jdbcToArrowVectors(resultSet, root, config);
+ return root;
+ }
+
+}
diff --git a/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfoTest.java b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfoTest.java
new file mode 100644
index 000000000..b1a8b8f22
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfoTest.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc;
+
+import static org.junit.Assert.*;
+
+import java.sql.Types;
+
+import org.junit.Test;
+
+public class JdbcFieldInfoTest {
+
+ @Test
+ public void testCreateJdbcFieldInfoWithJdbcType() {
+ JdbcFieldInfo fieldInfo = new JdbcFieldInfo(Types.BLOB);
+
+ assertEquals(Types.BLOB, fieldInfo.getJdbcType());
+ assertEquals(0, fieldInfo.getPrecision());
+ assertEquals(0, fieldInfo.getScale());
+ }
+
+ @Test
+ public void testCreateJdbcFieldInfoWithJdbcTypePrecisionAndScale() {
+ JdbcFieldInfo fieldInfo = new JdbcFieldInfo(Types.BLOB, 1, 2);
+
+ assertEquals(Types.BLOB, fieldInfo.getJdbcType());
+ assertEquals(1, fieldInfo.getPrecision());
+ assertEquals(2, fieldInfo.getScale());
+ }
+}
diff --git a/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java
new file mode 100644
index 000000000..68a681b05
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.sql.Types;
+import java.util.Calendar;
+import java.util.HashMap;
+import java.util.Locale;
+import java.util.TimeZone;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.junit.Test;
+
+public class JdbcToArrowConfigTest {
+
+ private static final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+ private static final Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT);
+
+ @Test(expected = NullPointerException.class)
+ public void testConfigNullArguments() {
+ new JdbcToArrowConfig(null, null);
+ }
+
+ @Test(expected = NullPointerException.class)
+ public void testBuilderNullArguments() {
+ new JdbcToArrowConfigBuilder(null, null);
+ }
+
+ @Test
+ public void testConfigNullCalendar() {
+ JdbcToArrowConfig config = new JdbcToArrowConfig(allocator, null);
+ assertNull(config.getCalendar());
+ }
+
+ @Test
+ public void testBuilderNullCalendar() {
+ JdbcToArrowConfigBuilder builder = new JdbcToArrowConfigBuilder(allocator, null);
+ JdbcToArrowConfig config = builder.build();
+ assertNull(config.getCalendar());
+ }
+
+ @Test(expected = NullPointerException.class)
+ public void testConfigNullAllocator() {
+ new JdbcToArrowConfig(null, calendar);
+ }
+
+ @Test(expected = NullPointerException.class)
+ public void testBuilderNullAllocator() {
+ new JdbcToArrowConfigBuilder(null, calendar);
+ }
+
+ @Test(expected = NullPointerException.class)
+ public void testSetNullAllocator() {
+ JdbcToArrowConfigBuilder builder = new JdbcToArrowConfigBuilder(allocator, calendar);
+ builder.setAllocator(null);
+ }
+
+ @Test
+ public void testSetNullCalendar() {
+ JdbcToArrowConfigBuilder builder = new JdbcToArrowConfigBuilder(allocator, calendar);
+ JdbcToArrowConfig config = builder.setCalendar(null).build();
+ assertNull(config.getCalendar());
+ }
+
+ @Test
+ public void testConfig() {
+ JdbcToArrowConfigBuilder builder = new JdbcToArrowConfigBuilder(allocator, calendar);
+ JdbcToArrowConfig config = builder.build();
+
+ assertTrue(allocator == config.getAllocator());
+ assertTrue(calendar == config.getCalendar());
+
+ Calendar newCalendar = Calendar.getInstance();
+ BufferAllocator newAllocator = new RootAllocator(Integer.SIZE);
+
+ builder.setAllocator(newAllocator).setCalendar(newCalendar);
+ config = builder.build();
+
+ assertTrue(newAllocator == config.getAllocator());
+ assertTrue(newCalendar == config.getCalendar());
+ }
+
+ @Test
+ public void testIncludeMetadata() {
+ JdbcToArrowConfigBuilder builder = new JdbcToArrowConfigBuilder(allocator, calendar, false);
+
+ JdbcToArrowConfig config = builder.build();
+ assertFalse(config.shouldIncludeMetadata());
+
+ builder.setIncludeMetadata(true);
+ config = builder.build();
+ assertTrue(config.shouldIncludeMetadata());
+
+ config = new JdbcToArrowConfigBuilder(allocator, calendar, true).build();
+ assertTrue(config.shouldIncludeMetadata());
+
+ config = new JdbcToArrowConfig(allocator, calendar, /* include metadata */ true,
+ /* reuse vector schema root */ true, null, null, JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE, null);
+ assertTrue(config.shouldIncludeMetadata());
+ assertTrue(config.isReuseVectorSchemaRoot());
+
+ config = new JdbcToArrowConfig(allocator, calendar, /* include metadata */ false,
+ /* reuse vector schema root */ false, null, null, JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE, null);
+ assertFalse(config.shouldIncludeMetadata());
+ assertFalse(config.isReuseVectorSchemaRoot());
+ }
+
+ @Test
+ public void testArraySubTypes() {
+ JdbcToArrowConfigBuilder builder = new JdbcToArrowConfigBuilder(allocator, calendar, false);
+ JdbcToArrowConfig config = builder.build();
+
+ final int columnIndex = 1;
+ final String columnName = "COLUMN";
+
+ assertNull(config.getArraySubTypeByColumnIndex(columnIndex));
+ assertNull(config.getArraySubTypeByColumnName(columnName));
+
+ final HashMap<Integer, JdbcFieldInfo> indexMapping = new HashMap<Integer, JdbcFieldInfo>();
+ indexMapping.put(2, new JdbcFieldInfo(Types.BIGINT));
+
+ final HashMap<String, JdbcFieldInfo> fieldMapping = new HashMap<String, JdbcFieldInfo>();
+ fieldMapping.put("NEW_COLUMN", new JdbcFieldInfo(Types.BINARY));
+
+ builder.setArraySubTypeByColumnIndexMap(indexMapping);
+ builder.setArraySubTypeByColumnNameMap(fieldMapping);
+ config = builder.build();
+
+ assertNull(config.getArraySubTypeByColumnIndex(columnIndex));
+ assertNull(config.getArraySubTypeByColumnName(columnName));
+
+ indexMapping.put(columnIndex, new JdbcFieldInfo(Types.BIT));
+ fieldMapping.put(columnName, new JdbcFieldInfo(Types.BLOB));
+
+ assertNotNull(config.getArraySubTypeByColumnIndex(columnIndex));
+ assertEquals(Types.BIT, config.getArraySubTypeByColumnIndex(columnIndex).getJdbcType());
+ assertEquals(Types.BLOB, config.getArraySubTypeByColumnName(columnName).getJdbcType());
+ }
+}
diff --git a/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowTestHelper.java b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowTestHelper.java
new file mode 100644
index 000000000..9fdb32d80
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowTestHelper.java
@@ -0,0 +1,381 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+import java.math.BigDecimal;
+import java.nio.charset.Charset;
+import java.sql.ResultSetMetaData;
+import java.sql.SQLException;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.arrow.vector.BaseValueVector;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.BitVector;
+import org.apache.arrow.vector.DateDayVector;
+import org.apache.arrow.vector.DecimalVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.SmallIntVector;
+import org.apache.arrow.vector.TimeMilliVector;
+import org.apache.arrow.vector.TimeStampVector;
+import org.apache.arrow.vector.TinyIntVector;
+import org.apache.arrow.vector.VarBinaryVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+/**
+ * This is a Helper class which has functionalities to read and assert the values from the given FieldVector object.
+ */
+public class JdbcToArrowTestHelper {
+
+ public static void assertIntVectorValues(IntVector intVector, int rowCount, Integer[] values) {
+ assertEquals(rowCount, intVector.getValueCount());
+
+ for (int j = 0; j < intVector.getValueCount(); j++) {
+ if (values[j] == null) {
+ assertTrue(intVector.isNull(j));
+ } else {
+ assertEquals(values[j].intValue(), intVector.get(j));
+ }
+ }
+ }
+
+ public static void assertBooleanVectorValues(BitVector bitVector, int rowCount, Boolean[] values) {
+ assertEquals(rowCount, bitVector.getValueCount());
+
+ for (int j = 0; j < bitVector.getValueCount(); j++) {
+ if (values[j] == null) {
+ assertTrue(bitVector.isNull(j));
+ } else {
+ assertEquals(values[j].booleanValue(), bitVector.get(j) == 1);
+ }
+ }
+ }
+
+ public static void assertBitVectorValues(BitVector bitVector, int rowCount, Integer[] values) {
+ assertEquals(rowCount, bitVector.getValueCount());
+
+ for (int j = 0; j < bitVector.getValueCount(); j++) {
+ if (values[j] == null) {
+ assertTrue(bitVector.isNull(j));
+ } else {
+ assertEquals(values[j].intValue(), bitVector.get(j));
+ }
+ }
+ }
+
+ public static void assertTinyIntVectorValues(TinyIntVector tinyIntVector, int rowCount, Integer[] values) {
+ assertEquals(rowCount, tinyIntVector.getValueCount());
+
+ for (int j = 0; j < tinyIntVector.getValueCount(); j++) {
+ if (values[j] == null) {
+ assertTrue(tinyIntVector.isNull(j));
+ } else {
+ assertEquals(values[j].intValue(), tinyIntVector.get(j));
+ }
+ }
+ }
+
+ public static void assertSmallIntVectorValues(SmallIntVector smallIntVector, int rowCount, Integer[] values) {
+ assertEquals(rowCount, smallIntVector.getValueCount());
+
+ for (int j = 0; j < smallIntVector.getValueCount(); j++) {
+ if (values[j] == null) {
+ assertTrue(smallIntVector.isNull(j));
+ } else {
+ assertEquals(values[j].intValue(), smallIntVector.get(j));
+ }
+ }
+ }
+
+ public static void assertBigIntVectorValues(BigIntVector bigIntVector, int rowCount, Long[] values) {
+ assertEquals(rowCount, bigIntVector.getValueCount());
+
+ for (int j = 0; j < bigIntVector.getValueCount(); j++) {
+ if (values[j] == null) {
+ assertTrue(bigIntVector.isNull(j));
+ } else {
+ assertEquals(values[j].longValue(), bigIntVector.get(j));
+ }
+ }
+ }
+
+ public static void assertDecimalVectorValues(DecimalVector decimalVector, int rowCount, BigDecimal[] values) {
+ assertEquals(rowCount, decimalVector.getValueCount());
+
+ for (int j = 0; j < decimalVector.getValueCount(); j++) {
+ if (values[j] == null) {
+ assertTrue(decimalVector.isNull(j));
+ } else {
+ assertEquals(values[j].doubleValue(), decimalVector.getObject(j).doubleValue(), 0);
+ }
+ }
+ }
+
+ public static void assertFloat8VectorValues(Float8Vector float8Vector, int rowCount, Double[] values) {
+ assertEquals(rowCount, float8Vector.getValueCount());
+
+ for (int j = 0; j < float8Vector.getValueCount(); j++) {
+ if (values[j] == null) {
+ assertTrue(float8Vector.isNull(j));
+ } else {
+ assertEquals(values[j], float8Vector.get(j), 0.01);
+ }
+ }
+ }
+
+ public static void assertFloat4VectorValues(Float4Vector float4Vector, int rowCount, Float[] values) {
+ assertEquals(rowCount, float4Vector.getValueCount());
+
+ for (int j = 0; j < float4Vector.getValueCount(); j++) {
+ if (values[j] == null) {
+ assertTrue(float4Vector.isNull(j));
+ } else {
+ assertEquals(values[j], float4Vector.get(j), 0.01);
+ }
+ }
+ }
+
+ public static void assertTimeVectorValues(TimeMilliVector timeMilliVector, int rowCount, Long[] values) {
+ assertEquals(rowCount, timeMilliVector.getValueCount());
+
+ for (int j = 0; j < timeMilliVector.getValueCount(); j++) {
+ if (values[j] == null) {
+ assertTrue(timeMilliVector.isNull(j));
+ } else {
+ assertEquals(values[j].longValue(), timeMilliVector.get(j));
+ }
+ }
+ }
+
+ public static void assertDateVectorValues(DateDayVector dateDayVector, int rowCount, Integer[] values) {
+ assertEquals(rowCount, dateDayVector.getValueCount());
+
+ for (int j = 0; j < dateDayVector.getValueCount(); j++) {
+ if (values[j] == null) {
+ assertTrue(dateDayVector.isNull(j));
+ } else {
+ assertEquals(values[j].longValue(), dateDayVector.get(j));
+ }
+ }
+ }
+
+ public static void assertTimeStampVectorValues(TimeStampVector timeStampVector, int rowCount, Long[] values) {
+ assertEquals(rowCount, timeStampVector.getValueCount());
+
+ for (int j = 0; j < timeStampVector.getValueCount(); j++) {
+ if (values[j] == null) {
+ assertTrue(timeStampVector.isNull(j));
+ } else {
+ assertEquals(values[j].longValue(), timeStampVector.get(j));
+ }
+ }
+ }
+
+ public static void assertVarBinaryVectorValues(VarBinaryVector varBinaryVector, int rowCount, byte[][] values) {
+ assertEquals(rowCount, varBinaryVector.getValueCount());
+
+ for (int j = 0; j < varBinaryVector.getValueCount(); j++) {
+ if (values[j] == null) {
+ assertTrue(varBinaryVector.isNull(j));
+ } else {
+ assertArrayEquals(values[j], varBinaryVector.get(j));
+ }
+ }
+ }
+
+ public static void assertVarcharVectorValues(VarCharVector varCharVector, int rowCount, byte[][] values) {
+ assertEquals(rowCount, varCharVector.getValueCount());
+
+ for (int j = 0; j < varCharVector.getValueCount(); j++) {
+ if (values[j] == null) {
+ assertTrue(varCharVector.isNull(j));
+ } else {
+ assertArrayEquals(values[j], varCharVector.get(j));
+ }
+ }
+ }
+
+ public static void assertNullVectorValues(NullVector vector, int rowCount) {
+ assertEquals(rowCount, vector.getValueCount());
+ }
+
+ public static void assertNullValues(BaseValueVector vector, int rowCount) {
+ assertEquals(rowCount, vector.getValueCount());
+
+ for (int j = 0; j < vector.getValueCount(); j++) {
+ assertTrue(vector.isNull(j));
+ }
+ }
+
+ public static void assertFieldMetadataIsEmpty(VectorSchemaRoot schema) {
+ assertNotNull(schema);
+ assertNotNull(schema.getSchema());
+ assertNotNull(schema.getSchema().getFields());
+
+ for (Field field : schema.getSchema().getFields()) {
+ assertNotNull(field.getMetadata());
+ assertEquals(0, field.getMetadata().size());
+ }
+ }
+
+ public static void assertFieldMetadataMatchesResultSetMetadata(ResultSetMetaData rsmd, Schema schema)
+ throws SQLException {
+ assertNotNull(schema);
+ assertNotNull(schema.getFields());
+ assertNotNull(rsmd);
+
+ List<Field> fields = schema.getFields();
+
+ assertEquals(rsmd.getColumnCount(), fields.size());
+
+ // Vector columns are created in the same order as ResultSet columns.
+ for (int i = 1; i <= rsmd.getColumnCount(); ++i) {
+ Map<String, String> metadata = fields.get(i - 1).getMetadata();
+
+ assertNotNull(metadata);
+ assertEquals(4, metadata.size());
+
+ assertEquals(rsmd.getCatalogName(i), metadata.get(Constants.SQL_CATALOG_NAME_KEY));
+ assertEquals(rsmd.getTableName(i), metadata.get(Constants.SQL_TABLE_NAME_KEY));
+ assertEquals(rsmd.getColumnLabel(i), metadata.get(Constants.SQL_COLUMN_NAME_KEY));
+ assertEquals(rsmd.getColumnTypeName(i), metadata.get(Constants.SQL_TYPE_KEY));
+ }
+ }
+
+ public static byte[] hexStringToByteArray(String s) {
+ int len = s.length();
+ byte[] data = new byte[len / 2];
+ for (int i = 0; i < len; i += 2) {
+ data[i / 2] = (byte) ((Character.digit(s.charAt(i), 16) << 4) +
+ Character.digit(s.charAt(i + 1), 16));
+ }
+ return data;
+ }
+
+ public static Integer[] getIntValues(String[] values, String dataType) {
+ String[] dataArr = getValues(values, dataType);
+ Integer[] valueArr = new Integer[dataArr.length];
+ int i = 0;
+ for (String data : dataArr) {
+ valueArr[i++] = "null".equals(data.trim()) ? null : Integer.parseInt(data);
+ }
+ return valueArr;
+ }
+
+ public static Boolean[] getBooleanValues(String[] values, String dataType) {
+ String[] dataArr = getValues(values, dataType);
+ Boolean[] valueArr = new Boolean[dataArr.length];
+ int i = 0;
+ for (String data : dataArr) {
+ valueArr[i++] = "null".equals(data.trim()) ? null : data.trim().equals("1");
+ }
+ return valueArr;
+ }
+
+ public static BigDecimal[] getDecimalValues(String[] values, String dataType) {
+ String[] dataArr = getValues(values, dataType);
+ BigDecimal[] valueArr = new BigDecimal[dataArr.length];
+ int i = 0;
+ for (String data : dataArr) {
+ valueArr[i++] = "null".equals(data.trim()) ? null : new BigDecimal(data);
+ }
+ return valueArr;
+ }
+
+ public static Double[] getDoubleValues(String[] values, String dataType) {
+ String[] dataArr = getValues(values, dataType);
+ Double[] valueArr = new Double[dataArr.length];
+ int i = 0;
+ for (String data : dataArr) {
+ valueArr[i++] = "null".equals(data.trim()) ? null : Double.parseDouble(data);
+ }
+ return valueArr;
+ }
+
+ public static Float[] getFloatValues(String[] values, String dataType) {
+ String[] dataArr = getValues(values, dataType);
+ Float[] valueArr = new Float[dataArr.length];
+ int i = 0;
+ for (String data : dataArr) {
+ valueArr[i++] = "null".equals(data.trim()) ? null : Float.parseFloat(data);
+ }
+ return valueArr;
+ }
+
+ public static Long[] getLongValues(String[] values, String dataType) {
+ String[] dataArr = getValues(values, dataType);
+ Long[] valueArr = new Long[dataArr.length];
+ int i = 0;
+ for (String data : dataArr) {
+ valueArr[i++] = "null".equals(data.trim()) ? null : Long.parseLong(data);
+ }
+ return valueArr;
+ }
+
+ public static byte[][] getCharArray(String[] values, String dataType) {
+ String[] dataArr = getValues(values, dataType);
+ byte[][] valueArr = new byte[dataArr.length][];
+ int i = 0;
+ for (String data : dataArr) {
+ valueArr[i++] = "null".equals(data.trim()) ? null : data.trim().getBytes();
+ }
+ return valueArr;
+ }
+
+ public static byte[][] getCharArrayWithCharSet(String[] values, String dataType, Charset charSet) {
+ String[] dataArr = getValues(values, dataType);
+ byte[][] valueArr = new byte[dataArr.length][];
+ int i = 0;
+ for (String data : dataArr) {
+ valueArr[i++] = "null".equals(data.trim()) ? null : data.trim().getBytes(charSet);
+ }
+ return valueArr;
+ }
+
+ public static byte[][] getBinaryValues(String[] values, String dataType) {
+ String[] dataArr = getValues(values, dataType);
+ byte[][] valueArr = new byte[dataArr.length][];
+ int i = 0;
+ for (String data : dataArr) {
+ valueArr[i++] = "null".equals(data.trim()) ? null : hexStringToByteArray(data.trim());
+ }
+ return valueArr;
+ }
+
+ public static String[] getValues(String[] values, String dataType) {
+ String value = "";
+ for (String val : values) {
+ if (val.startsWith(dataType)) {
+ value = val.split("=")[1];
+ break;
+ }
+ }
+ return value.split(",");
+ }
+}
diff --git a/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/Table.java b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/Table.java
new file mode 100644
index 000000000..213716266
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/Table.java
@@ -0,0 +1,234 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc;
+
+import java.math.BigDecimal;
+import java.nio.charset.StandardCharsets;
+
+import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
+
+/**
+ * POJO to handle the YAML data from the test YAML file.
+ */
+@JsonIgnoreProperties(ignoreUnknown = true)
+public class Table {
+ private String name;
+ private String type;
+ private String vector;
+ private String timezone;
+ private String create;
+ private String[] data;
+ private String query;
+ private String drop;
+ private String[] values;
+ private String[] vectors;
+ private int rowCount;
+
+ public Table() {
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public void setName(String name) {
+ this.name = name;
+ }
+
+ public String getType() {
+ return type;
+ }
+
+ public void setType(String type) {
+ this.type = type;
+ }
+
+ public String getVector() {
+ return vector;
+ }
+
+ public void setVector(String vector) {
+ this.vector = vector;
+ }
+
+ public String[] getValues() {
+ return values;
+ }
+
+ public void setValues(String[] values) {
+ this.values = values;
+ }
+
+ public Long[] getLongValues() {
+ Long[] arr = new Long[values.length];
+ int i = 0;
+ for (String str : values) {
+ arr[i++] = Long.parseLong(str);
+ }
+ return arr;
+ }
+
+ public Integer[] getIntValues() {
+ Integer[] arr = new Integer[values.length];
+ int i = 0;
+ for (String str : values) {
+ arr[i++] = Integer.parseInt(str);
+ }
+ return arr;
+ }
+
+ public Boolean[] getBoolValues() {
+ Boolean[] arr = new Boolean[values.length];
+ int i = 0;
+ for (String str : values) {
+ arr[i++] = Boolean.parseBoolean(str);
+ }
+ return arr;
+ }
+
+ public BigDecimal[] getBigDecimalValues() {
+ BigDecimal[] arr = new BigDecimal[values.length];
+ int i = 0;
+ for (String str : values) {
+ arr[i++] = new BigDecimal(str);
+ }
+ return arr;
+ }
+
+ public Double[] getDoubleValues() {
+ Double[] arr = new Double[values.length];
+ int i = 0;
+ for (String str : values) {
+ arr[i++] = Double.parseDouble(str);
+ }
+ return arr;
+ }
+
+ public Float[] getFloatValues() {
+ Float[] arr = new Float[values.length];
+ int i = 0;
+ for (String str : values) {
+ arr[i++] = Float.parseFloat(str);
+ }
+ return arr;
+ }
+
+ public byte[][] getBinaryValues() {
+ return getHexToByteArray(values);
+ }
+
+ public byte[][] getVarCharValues() {
+ return getByteArray(values);
+ }
+
+ public byte[][] getBlobValues() {
+ return getBinaryValues();
+ }
+
+ public byte[][] getClobValues() {
+ return getByteArray(values);
+ }
+
+ public byte[][] getCharValues() {
+ return getByteArray(values);
+ }
+
+ public String getCreate() {
+ return create;
+ }
+
+ public void setCreate(String create) {
+ this.create = create;
+ }
+
+ public String[] getData() {
+ return data;
+ }
+
+ public void setData(String[] data) {
+ this.data = data;
+ }
+
+ public String getQuery() {
+ return query;
+ }
+
+ public void setQuery(String query) {
+ this.query = query;
+ }
+
+ public String getDrop() {
+ return drop;
+ }
+
+ public void setDrop(String drop) {
+ this.drop = drop;
+ }
+
+ public String getTimezone() {
+ return timezone;
+ }
+
+ public void setTimezone(String timezone) {
+ this.timezone = timezone;
+ }
+
+ public String[] getVectors() {
+ return vectors;
+ }
+
+ public void setVectors(String[] vectors) {
+ this.vectors = vectors;
+ }
+
+ public int getRowCount() {
+ return rowCount;
+ }
+
+ public void setRowCount(int rowCount) {
+ this.rowCount = rowCount;
+ }
+
+ static byte[][] getByteArray(String[] data) {
+ byte[][] byteArr = new byte[data.length][];
+
+ for (int i = 0; i < data.length; i++) {
+ byteArr[i] = data[i].getBytes(StandardCharsets.UTF_8);
+ }
+ return byteArr;
+ }
+
+ static byte[][] getHexToByteArray(String[] data) {
+ byte[][] byteArr = new byte[data.length][];
+
+ for (int i = 0; i < data.length; i++) {
+ byteArr[i] = hexStringToByteArray(data[i]);
+ }
+ return byteArr;
+ }
+
+ static byte[] hexStringToByteArray(String s) {
+ int len = s.length();
+ byte[] data = new byte[len / 2];
+ for (int i = 0; i < len; i += 2) {
+ data[i / 2] = (byte) ((Character.digit(s.charAt(i), 16) << 4) +
+ Character.digit(s.charAt(i + 1), 16));
+ }
+ return data;
+ }
+}
diff --git a/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/AbstractConsumerTest.java b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/AbstractConsumerTest.java
new file mode 100644
index 000000000..96bac4221
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/AbstractConsumerTest.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc.consumer;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.junit.After;
+import org.junit.Before;
+
+public abstract class AbstractConsumerTest {
+
+ protected BufferAllocator allocator;
+
+ @Before
+ public void setUp() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ }
+
+ @After
+ public void tearDown() {
+ allocator.close();
+ }
+
+}
diff --git a/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumerTest.java b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumerTest.java
new file mode 100644
index 000000000..a368023d4
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumerTest.java
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc.consumer;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+
+import org.apache.arrow.vector.BaseValueVector;
+import org.apache.arrow.vector.VarBinaryVector;
+import org.junit.Test;
+
+public class BinaryConsumerTest extends AbstractConsumerTest {
+
+ private static final int INITIAL_VALUE_ALLOCATION = BaseValueVector.INITIAL_VALUE_ALLOCATION;
+ private static final int DEFAULT_RECORD_BYTE_COUNT = 8;
+
+ interface InputStreamConsumer {
+ void consume(BinaryConsumer consumer) throws IOException;
+ }
+
+ protected void assertConsume(boolean nullable, InputStreamConsumer dataConsumer, byte[][] expect) throws IOException {
+ try (final VarBinaryVector vector = new VarBinaryVector("binary", allocator)) {
+ BinaryConsumer consumer = BinaryConsumer.createConsumer(vector, 0, nullable);
+ dataConsumer.consume(consumer);
+ assertEquals(expect.length - 1, vector.getLastSet());
+ for (int i = 0; i < expect.length; i++) {
+ byte[] value = expect[i];
+ if (value == null) {
+ assertTrue(vector.isNull(i));
+ } else {
+ assertArrayEquals(expect[i], vector.get(i));
+ }
+ }
+ }
+ }
+
+ private byte[] createBytes(int length) {
+ byte[] bytes = new byte[length];
+ for (int i = 0; i < length; i++) {
+ bytes[i] = (byte) (i % 1024);
+ }
+ return bytes;
+ }
+
+
+ public void testConsumeInputStream(byte[][] values, boolean nullable) throws IOException {
+ assertConsume(nullable, binaryConsumer -> {
+ for (byte[] value : values) {
+ binaryConsumer.consume(new ByteArrayInputStream(value));
+ binaryConsumer.moveWriterPosition();
+ }
+ }, values);
+ }
+
+ @Test
+ public void testConsumeInputStream() throws IOException {
+ testConsumeInputStream(new byte[][]{
+ createBytes(DEFAULT_RECORD_BYTE_COUNT)
+ }, false);
+
+ testConsumeInputStream(new byte[][]{
+ createBytes(DEFAULT_RECORD_BYTE_COUNT),
+ createBytes(DEFAULT_RECORD_BYTE_COUNT)
+ }, false);
+
+ testConsumeInputStream(new byte[][]{
+ createBytes(DEFAULT_RECORD_BYTE_COUNT * 2),
+ createBytes(DEFAULT_RECORD_BYTE_COUNT),
+ createBytes(DEFAULT_RECORD_BYTE_COUNT)
+ }, false);
+
+ testConsumeInputStream(new byte[][]{
+ createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT)
+ }, false);
+
+ testConsumeInputStream(new byte[][]{
+ createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT * 10),
+ }, false);
+
+ testConsumeInputStream(new byte[][]{
+ createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT),
+ createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT)
+ }, false);
+
+ testConsumeInputStream(new byte[][]{
+ createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT),
+ createBytes(DEFAULT_RECORD_BYTE_COUNT),
+ createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT)
+ }, false);
+
+ byte[][] testRecords = new byte[INITIAL_VALUE_ALLOCATION * 2][];
+ for (int i = 0; i < testRecords.length; i++) {
+ testRecords[i] = createBytes(DEFAULT_RECORD_BYTE_COUNT);
+ }
+ testConsumeInputStream(testRecords, false);
+ }
+
+}
diff --git a/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcAliasToArrowTest.java b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcAliasToArrowTest.java
new file mode 100644
index 000000000..a6e6b22fc
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcAliasToArrowTest.java
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc.h2;
+
+import static org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest.sqlToArrow;
+import static org.junit.Assert.assertEquals;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.ResultSetMetaData;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.util.List;
+
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class JdbcAliasToArrowTest {
+ private Connection conn = null;
+
+ private static final String CREATE_STATEMENT =
+ "CREATE TABLE example_table (id INTEGER);";
+ private static final String INSERT_STATEMENT =
+ "INSERT INTO example_table (id) VALUES (?);";
+ private static final String QUERY = "SELECT id as a, id as b FROM example_table;";
+ private static final String DROP_STATEMENT = "DROP TABLE example_table;";
+ private static final String ORIGINAL_COLUMN_NAME = "ID";
+ private static final String COLUMN_A = "A";
+ private static final String COLUMN_B = "B";
+
+ @Before
+ public void setUp() throws Exception {
+ String url = "jdbc:h2:mem:JdbcAliasToArrowTest";
+ String driver = "org.h2.Driver";
+ Class.forName(driver);
+ conn = DriverManager.getConnection(url);
+ try (Statement stmt = conn.createStatement()) {
+ stmt.executeUpdate(CREATE_STATEMENT);
+ }
+ }
+
+ /**
+ * Test h2 database query with alias for column name and column label.
+ * To vetify reading field alias from an H2 database works as expected.
+ * If this test fails, something is either wrong with the setup,
+ * or the H2 SQL behavior changed.
+ */
+ @Test
+ public void testReadH2Alias() throws Exception {
+ // insert rows
+ int rowCount = 4;
+ insertRows(rowCount);
+
+ try (ResultSet resultSet = conn.createStatement().executeQuery(QUERY)) {
+ ResultSetMetaData rsmd = resultSet.getMetaData();
+ assertEquals(2, rsmd.getColumnCount());
+
+ // check column name and column label
+ assertEquals(ORIGINAL_COLUMN_NAME, rsmd.getColumnName(1));
+ assertEquals(COLUMN_A, rsmd.getColumnLabel(1));
+ assertEquals(ORIGINAL_COLUMN_NAME, rsmd.getColumnName(2));
+ assertEquals(COLUMN_B, rsmd.getColumnLabel(2));
+
+ int rowNum = 0;
+
+ while (resultSet.next()) {
+ assertEquals(rowNum, resultSet.getInt(COLUMN_A));
+ assertEquals(rowNum, resultSet.getInt(COLUMN_B));
+ ++rowNum;
+ }
+
+ assertEquals(rowCount, rowNum);
+ }
+ }
+
+ /**
+ * Test jdbc query results with alias to arrow works expected.
+ * Arrow result schema name should be field alias name.
+ */
+ @Test
+ public void testJdbcAliasToArrow() throws Exception {
+ int rowCount = 4;
+ insertRows(rowCount);
+
+ try (ResultSet resultSet = conn.createStatement().executeQuery(QUERY)) {
+ final VectorSchemaRoot vector =
+ sqlToArrow(resultSet, new RootAllocator(Integer.MAX_VALUE));
+
+ assertEquals(rowCount, vector.getRowCount());
+ Schema vectorSchema = vector.getSchema();
+ List<Field> vectorFields = vectorSchema.getFields();
+ assertEquals(vectorFields.get(0).getName(), COLUMN_A);
+ assertEquals(vectorFields.get(1).getName(), COLUMN_B);
+ }
+ }
+
+ @After
+ public void tearDown() throws SQLException {
+ try (Statement stmt = conn.createStatement()) {
+ stmt.executeUpdate(DROP_STATEMENT);
+ } finally {
+ if (conn != null) {
+ conn.close();
+ conn = null;
+ }
+ }
+ }
+
+ private void insertRows(int numRows) throws SQLException {
+ // Insert [numRows] Rows
+ try (PreparedStatement stmt = conn.prepareStatement(INSERT_STATEMENT)) {
+ for (int i = 0; i < numRows; ++i) {
+ stmt.setInt(1, i);
+ stmt.executeUpdate();
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowArrayTest.java b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowArrayTest.java
new file mode 100644
index 000000000..b7dc1ee58
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowArrayTest.java
@@ -0,0 +1,375 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc.h2;
+
+import static org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest.sqlToArrow;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+
+import java.sql.Array;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.ResultSetMetaData;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.sql.Types;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.arrow.adapter.jdbc.JdbcFieldInfo;
+import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig;
+import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder;
+import org.apache.arrow.adapter.jdbc.JdbcToArrowUtils;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.complex.ListVector;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class JdbcToArrowArrayTest {
+ private Connection conn = null;
+
+ private static final String CREATE_STATEMENT =
+ "CREATE TABLE array_table (id INTEGER, int_array ARRAY, float_array ARRAY, string_array ARRAY);";
+ private static final String INSERT_STATEMENT =
+ "INSERT INTO array_table (id, int_array, float_array, string_array) VALUES (?, ?, ?, ?);";
+ private static final String QUERY = "SELECT int_array, float_array, string_array FROM array_table ORDER BY id;";
+ private static final String DROP_STATEMENT = "DROP TABLE array_table;";
+
+ private static Map<String, JdbcFieldInfo> arrayFieldMapping;
+
+ private static final String INT_ARRAY_FIELD_NAME = "INT_ARRAY";
+ private static final String FLOAT_ARRAY_FIELD_NAME = "FLOAT_ARRAY";
+ private static final String STRING_ARRAY_FIELD_NAME = "STRING_ARRAY";
+
+ @Before
+ public void setUp() throws Exception {
+ String url = "jdbc:h2:mem:JdbcToArrowTest";
+ String driver = "org.h2.Driver";
+ Class.forName(driver);
+ conn = DriverManager.getConnection(url);
+ try (Statement stmt = conn.createStatement()) {
+ stmt.executeUpdate(CREATE_STATEMENT);
+ }
+
+ arrayFieldMapping = new HashMap<String, JdbcFieldInfo>();
+ arrayFieldMapping.put(INT_ARRAY_FIELD_NAME, new JdbcFieldInfo(Types.INTEGER));
+ arrayFieldMapping.put(FLOAT_ARRAY_FIELD_NAME, new JdbcFieldInfo(Types.REAL));
+ arrayFieldMapping.put(STRING_ARRAY_FIELD_NAME, new JdbcFieldInfo(Types.VARCHAR));
+ }
+
+ // This test verifies reading an array field from an H2 database
+ // works as expected. If this test fails, something is either wrong
+ // with the setup, or the H2 SQL behavior changed.
+ @Test
+ public void testReadH2Array() throws Exception {
+ int rowCount = 4;
+
+ Integer[][] intArrays = generateIntegerArrayField(rowCount);
+ Float[][] floatArrays = generateFloatArrayField(rowCount);
+ String[][] strArrays = generateStringArrayField(rowCount);
+
+ insertRows(rowCount, intArrays, floatArrays, strArrays);
+
+ try (ResultSet resultSet = conn.createStatement().executeQuery(QUERY)) {
+ ResultSetMetaData rsmd = resultSet.getMetaData();
+ assertEquals(3, rsmd.getColumnCount());
+
+ for (int i = 1; i <= rsmd.getColumnCount(); ++i) {
+ assertEquals(Types.ARRAY, rsmd.getColumnType(i));
+ }
+
+ int rowNum = 0;
+
+ while (resultSet.next()) {
+ Array intArray = resultSet.getArray(INT_ARRAY_FIELD_NAME);
+ assertFalse(resultSet.wasNull());
+ try (ResultSet rs = intArray.getResultSet()) {
+ int arrayIndex = 0;
+ while (rs.next()) {
+ assertEquals(intArrays[rowNum][arrayIndex].intValue(), rs.getInt(2));
+ ++arrayIndex;
+ }
+ assertEquals(intArrays[rowNum].length, arrayIndex);
+ }
+
+ Array floatArray = resultSet.getArray(FLOAT_ARRAY_FIELD_NAME);
+ assertFalse(resultSet.wasNull());
+ try (ResultSet rs = floatArray.getResultSet()) {
+ int arrayIndex = 0;
+ while (rs.next()) {
+ assertEquals(floatArrays[rowNum][arrayIndex].floatValue(), rs.getFloat(2), 0.001);
+ ++arrayIndex;
+ }
+ assertEquals(floatArrays[rowNum].length, arrayIndex);
+ }
+
+ Array strArray = resultSet.getArray(STRING_ARRAY_FIELD_NAME);
+ assertFalse(resultSet.wasNull());
+ try (ResultSet rs = strArray.getResultSet()) {
+ int arrayIndex = 0;
+ while (rs.next()) {
+ assertEquals(strArrays[rowNum][arrayIndex], rs.getString(2));
+ ++arrayIndex;
+ }
+ assertEquals(strArrays[rowNum].length, arrayIndex);
+ }
+
+ ++rowNum;
+ }
+
+ assertEquals(rowCount, rowNum);
+ }
+ }
+
+ @Test
+ public void testJdbcToArrow() throws Exception {
+ int rowCount = 4;
+
+ Integer[][] intArrays = generateIntegerArrayField(rowCount);
+ Float[][] floatArrays = generateFloatArrayField(rowCount);
+ String[][] strArrays = generateStringArrayField(rowCount);
+
+ insertRows(rowCount, intArrays, floatArrays, strArrays);
+
+ final JdbcToArrowConfigBuilder builder =
+ new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), JdbcToArrowUtils.getUtcCalendar(), false);
+ builder.setArraySubTypeByColumnNameMap(arrayFieldMapping);
+
+ final JdbcToArrowConfig config = builder.build();
+
+ try (ResultSet resultSet = conn.createStatement().executeQuery(QUERY)) {
+ final VectorSchemaRoot vector = sqlToArrow(resultSet, config);
+
+ assertEquals(rowCount, vector.getRowCount());
+
+ assertIntegerVectorEquals((ListVector) vector.getVector(INT_ARRAY_FIELD_NAME), rowCount, intArrays);
+ assertFloatVectorEquals((ListVector) vector.getVector(FLOAT_ARRAY_FIELD_NAME), rowCount, floatArrays);
+ assertStringVectorEquals((ListVector) vector.getVector(STRING_ARRAY_FIELD_NAME), rowCount, strArrays);
+ }
+ }
+
+ @Test
+ public void testJdbcToArrowWithNulls() throws Exception {
+ int rowCount = 4;
+
+ Integer[][] intArrays = {
+ null,
+ {0},
+ {1},
+ {},
+ };
+
+ Float[][] floatArrays = {
+ { 2.0f },
+ null,
+ { 3.0f },
+ {},
+ };
+
+ String[][] stringArrays = {
+ {"4"},
+ null,
+ {"5"},
+ {},
+ };
+
+ insertRows(rowCount, intArrays, floatArrays, stringArrays);
+
+ final JdbcToArrowConfigBuilder builder =
+ new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), JdbcToArrowUtils.getUtcCalendar(), false);
+ builder.setArraySubTypeByColumnNameMap(arrayFieldMapping);
+
+ final JdbcToArrowConfig config = builder.build();
+
+ try (ResultSet resultSet = conn.createStatement().executeQuery(QUERY)) {
+ final VectorSchemaRoot vector = sqlToArrow(resultSet, config);
+
+ assertEquals(rowCount, vector.getRowCount());
+
+ assertIntegerVectorEquals((ListVector) vector.getVector(INT_ARRAY_FIELD_NAME), rowCount, intArrays);
+ assertFloatVectorEquals((ListVector) vector.getVector(FLOAT_ARRAY_FIELD_NAME), rowCount, floatArrays);
+ assertStringVectorEquals((ListVector) vector.getVector(STRING_ARRAY_FIELD_NAME), rowCount, stringArrays);
+ }
+ }
+
+ private void assertIntegerVectorEquals(ListVector listVector, int rowCount, Integer[][] expectedValues) {
+ IntVector vector = (IntVector) listVector.getDataVector();
+ ArrowBuf offsetBuffer = listVector.getOffsetBuffer();
+
+ int prevOffset = 0;
+ for (int row = 0; row < rowCount; ++row) {
+ int offset = offsetBuffer.getInt((row + 1) * ListVector.OFFSET_WIDTH);
+
+ if (expectedValues[row] == null) {
+ assertEquals(0, listVector.isSet(row));
+ assertEquals(0, offset - prevOffset);
+ continue;
+ }
+
+ assertEquals(1, listVector.isSet(row));
+ assertEquals(expectedValues[row].length, offset - prevOffset);
+
+ for (int i = prevOffset; i < offset; ++i) {
+ assertEquals(expectedValues[row][i - prevOffset].intValue(), vector.get(i));
+ }
+
+ prevOffset = offset;
+ }
+ }
+
+ private void assertFloatVectorEquals(ListVector listVector, int rowCount, Float[][] expectedValues) {
+ Float4Vector vector = (Float4Vector) listVector.getDataVector();
+ ArrowBuf offsetBuffer = listVector.getOffsetBuffer();
+
+ int prevOffset = 0;
+ for (int row = 0; row < rowCount; ++row) {
+ int offset = offsetBuffer.getInt((row + 1) * ListVector.OFFSET_WIDTH);
+
+ if (expectedValues[row] == null) {
+ assertEquals(0, listVector.isSet(row));
+ assertEquals(0, offset - prevOffset);
+ continue;
+ }
+
+ assertEquals(1, listVector.isSet(row));
+ assertEquals(expectedValues[row].length, offset - prevOffset);
+
+ for (int i = prevOffset; i < offset; ++i) {
+ assertEquals(expectedValues[row][i - prevOffset].floatValue(), vector.get(i), 0);
+ }
+
+ prevOffset = offset;
+ }
+ }
+
+ private void assertStringVectorEquals(ListVector listVector, int rowCount, String[][] expectedValues) {
+ VarCharVector vector = (VarCharVector) listVector.getDataVector();
+ ArrowBuf offsetBuffer = listVector.getOffsetBuffer();
+
+ int prevOffset = 0;
+ for (int row = 0; row < rowCount; ++row) {
+ int offset = offsetBuffer.getInt((row + 1) * ListVector.OFFSET_WIDTH);
+
+ if (expectedValues[row] == null) {
+ assertEquals(0, listVector.isSet(row));
+ assertEquals(0, offset - prevOffset);
+ continue;
+ }
+
+ assertEquals(1, listVector.isSet(row));
+ assertEquals(expectedValues[row].length, offset - prevOffset);
+ for (int i = prevOffset; i < offset; ++i) {
+ assertArrayEquals(expectedValues[row][i - prevOffset].getBytes(), vector.get(i));
+ }
+
+ prevOffset = offset;
+ }
+ }
+
+ @After
+ public void tearDown() throws SQLException {
+ try (Statement stmt = conn.createStatement()) {
+ stmt.executeUpdate(DROP_STATEMENT);
+ } finally {
+ if (conn != null) {
+ conn.close();
+ conn = null;
+ }
+ }
+ }
+
+ private Integer[][] generateIntegerArrayField(int numRows) {
+ Integer[][] result = new Integer[numRows][];
+
+ for (int i = 0; i < numRows; ++i) {
+ int val = i * 4;
+ result[i] = new Integer[]{val, val + 1, val + 2, val + 3};
+ }
+
+ return result;
+ }
+
+ private Float[][] generateFloatArrayField(int numRows) {
+ Float[][] result = new Float[numRows][];
+
+ for (int i = 0; i < numRows; ++i) {
+ int val = i * 4;
+ result[i] = new Float[]{(float) val, (float) val + 1, (float) val + 2, (float) val + 3};
+ }
+
+ return result;
+ }
+
+ private String[][] generateStringArrayField(int numRows) {
+ String[][] result = new String[numRows][];
+
+ for (int i = 0; i < numRows; ++i) {
+ int val = i * 4;
+ result[i] = new String[]{
+ String.valueOf(val),
+ String.valueOf(val + 1),
+ String.valueOf(val + 2),
+ String.valueOf(val + 3) };
+ }
+
+ return result;
+ }
+
+ private void insertRows(
+ int numRows,
+ Integer[][] integerArrays,
+ Float[][] floatArrays,
+ String[][] strArrays)
+ throws SQLException {
+
+ // Insert 4 Rows
+ try (PreparedStatement stmt = conn.prepareStatement(INSERT_STATEMENT)) {
+
+ for (int i = 0; i < numRows; ++i) {
+ Integer[] integerArray = integerArrays[i];
+ Float[] floatArray = floatArrays[i];
+ String[] strArray = strArrays[i];
+
+ Array intArray = conn.createArrayOf("INT", integerArray);
+ Array realArray = conn.createArrayOf("REAL", floatArray);
+ Array varcharArray = conn.createArrayOf("VARCHAR", strArray);
+
+ // Insert Arrays of 4 Values in Each Row
+ stmt.setInt(1, i);
+ stmt.setArray(2, intArray);
+ stmt.setArray(3, realArray);
+ stmt.setArray(4, varcharArray);
+
+ stmt.executeUpdate();
+
+ intArray.free();
+ realArray.free();
+ varcharArray.free();
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java
new file mode 100644
index 000000000..b548c9169
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java
@@ -0,0 +1,155 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc.h2;
+
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertVarcharVectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getCharArrayWithCharSet;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.sql.DriverManager;
+import java.sql.ResultSetMetaData;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.util.Arrays;
+import java.util.Calendar;
+import java.util.Collection;
+
+import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest;
+import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig;
+import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder;
+import org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper;
+import org.apache.arrow.adapter.jdbc.JdbcToArrowUtils;
+import org.apache.arrow.adapter.jdbc.Table;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+/**
+ * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with UTF-8 Charset,
+ * including the multi-byte CJK characters for H2 database.
+ */
+@RunWith(Parameterized.class)
+public class JdbcToArrowCharSetTest extends AbstractJdbcToArrowTest {
+
+ private static final String[] testFiles = {
+ "h2/test1_charset_h2.yml",
+ "h2/test1_charset_ch_h2.yml",
+ "h2/test1_charset_jp_h2.yml",
+ "h2/test1_charset_kr_h2.yml"
+ };
+
+ /**
+ * Constructor which populates the table object for each test iteration.
+ *
+ * @param table Table oject
+ */
+ public JdbcToArrowCharSetTest(Table table) {
+ this.table = table;
+ }
+
+ /**
+ * This method creates Connection object and DB table and also populate data into table for test.
+ *
+ * @throws SQLException on error
+ * @throws ClassNotFoundException on error
+ */
+ @Before
+ public void setUp() throws SQLException, ClassNotFoundException {
+ String url = "jdbc:h2:mem:JdbcToArrowTest?characterEncoding=UTF-8";
+ String driver = "org.h2.Driver";
+ Class.forName(driver);
+ conn = DriverManager.getConnection(url);
+ try (Statement stmt = conn.createStatement();) {
+ stmt.executeUpdate(table.getCreate());
+ for (String insert : table.getData()) {
+ stmt.executeUpdate(insert);
+ }
+ }
+ }
+
+ /**
+ * Get the test data as a collection of Table objects for each test iteration.
+ *
+ * @return Collection of Table objects
+ * @throws SQLException on error
+ * @throws ClassNotFoundException on error
+ * @throws IOException on error
+ */
+ @Parameters
+ public static Collection<Object[]> getTestData() throws SQLException, ClassNotFoundException, IOException {
+ return Arrays.asList(prepareTestData(testFiles, JdbcToArrowCharSetTest.class));
+ }
+
+ /**
+ * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes with UTF-8 Charset, including
+ * the multi-byte CJK characters.
+ */
+ @Test
+ public void testJdbcToArrowValues() throws SQLException, IOException {
+ testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE),
+ Calendar.getInstance()));
+ testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE)));
+ testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()),
+ new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()));
+ testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery())));
+ testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()),
+ new RootAllocator(Integer.MAX_VALUE)));
+ testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()),
+ Calendar.getInstance()));
+ testDataSets(sqlToArrow(
+ conn.createStatement().executeQuery(table.getQuery()),
+ new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()).build()));
+ testDataSets(sqlToArrow(
+ conn,
+ table.getQuery(),
+ new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()).build()));
+ }
+
+ @Test
+ public void testJdbcSchemaMetadata() throws SQLException {
+ JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(0), Calendar.getInstance(), true).build();
+ ResultSetMetaData rsmd = conn.createStatement().executeQuery(table.getQuery()).getMetaData();
+ Schema schema = JdbcToArrowUtils.jdbcToArrowSchema(rsmd, config);
+ JdbcToArrowTestHelper.assertFieldMetadataMatchesResultSetMetadata(rsmd, schema);
+ }
+
+ /**
+ * This method calls the assert methods for various DataSets.
+ *
+ * @param root VectorSchemaRoot for test
+ */
+ public void testDataSets(VectorSchemaRoot root) {
+ JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root);
+
+ assertVarcharVectorValues((VarCharVector) root.getVector(CLOB), table.getRowCount(),
+ getCharArrayWithCharSet(table.getValues(), CLOB, StandardCharsets.UTF_8));
+
+ assertVarcharVectorValues((VarCharVector) root.getVector(VARCHAR), table.getRowCount(),
+ getCharArrayWithCharSet(table.getValues(), VARCHAR, StandardCharsets.UTF_8));
+
+ assertVarcharVectorValues((VarCharVector) root.getVector(CHAR), table.getRowCount(),
+ getCharArrayWithCharSet(table.getValues(), CHAR, StandardCharsets.UTF_8));
+ }
+}
diff --git a/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java
new file mode 100644
index 000000000..40db5c235
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java
@@ -0,0 +1,248 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc.h2;
+
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertBigIntVectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertBitVectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertBooleanVectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertDateVectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertDecimalVectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertFloat4VectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertFloat8VectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertIntVectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertNullVectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertSmallIntVectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertTimeStampVectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertTimeVectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertTinyIntVectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertVarBinaryVectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertVarcharVectorValues;
+
+import java.io.IOException;
+import java.sql.ResultSetMetaData;
+import java.sql.SQLException;
+import java.util.Arrays;
+import java.util.Calendar;
+import java.util.Collection;
+
+import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest;
+import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig;
+import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder;
+import org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper;
+import org.apache.arrow.adapter.jdbc.JdbcToArrowUtils;
+import org.apache.arrow.adapter.jdbc.Table;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.BitVector;
+import org.apache.arrow.vector.DateDayVector;
+import org.apache.arrow.vector.DecimalVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.SmallIntVector;
+import org.apache.arrow.vector.TimeMilliVector;
+import org.apache.arrow.vector.TimeStampVector;
+import org.apache.arrow.vector.TinyIntVector;
+import org.apache.arrow.vector.VarBinaryVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+/**
+ * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with various data types
+ * for H2 database using multiple test data files.
+ */
+@RunWith(Parameterized.class)
+public class JdbcToArrowDataTypesTest extends AbstractJdbcToArrowTest {
+
+ private static final String BIGINT = "big_int";
+ private static final String BINARY = "binary";
+ private static final String BIT = "bit";
+ private static final String BLOB = "blob";
+ private static final String BOOL = "bool";
+ private static final String CHAR = "char";
+ private static final String CLOB = "clob";
+ private static final String DATE = "date";
+ private static final String DECIMAL = "decimal";
+ private static final String DOUBLE = "double";
+ private static final String INT = "int";
+ private static final String REAL = "real";
+ private static final String SMALLINT = "small_int";
+ private static final String TIME = "time";
+ private static final String TIMESTAMP = "timestamp";
+ private static final String TINYINT = "tiny_int";
+ private static final String VARCHAR = "varchar";
+ private static final String NULL = "null";
+
+ private static final String[] testFiles = {
+ "h2/test1_bigint_h2.yml",
+ "h2/test1_binary_h2.yml",
+ "h2/test1_bit_h2.yml",
+ "h2/test1_blob_h2.yml",
+ "h2/test1_bool_h2.yml",
+ "h2/test1_char_h2.yml",
+ "h2/test1_clob_h2.yml",
+ "h2/test1_date_h2.yml",
+ "h2/test1_decimal_h2.yml",
+ "h2/test1_double_h2.yml",
+ "h2/test1_int_h2.yml",
+ "h2/test1_real_h2.yml",
+ "h2/test1_smallint_h2.yml",
+ "h2/test1_time_h2.yml",
+ "h2/test1_timestamp_h2.yml",
+ "h2/test1_tinyint_h2.yml",
+ "h2/test1_varchar_h2.yml",
+ "h2/test1_null_h2.yml"
+ };
+
+ /**
+ * Constructor which populates the table object for each test iteration.
+ *
+ * @param table Table object
+ */
+ public JdbcToArrowDataTypesTest(Table table) {
+ this.table = table;
+ }
+
+ /**
+ * Get the test data as a collection of Table objects for each test iteration.
+ *
+ * @return Collection of Table objects
+ * @throws SQLException on error
+ * @throws ClassNotFoundException on error
+ * @throws IOException on error
+ */
+ @Parameters
+ public static Collection<Object[]> getTestData() throws SQLException, ClassNotFoundException, IOException {
+ return Arrays.asList(prepareTestData(testFiles, JdbcToArrowDataTypesTest.class));
+ }
+
+ /**
+ * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes.
+ */
+ @Test
+ public void testJdbcToArrowValues() throws SQLException, IOException {
+ testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE),
+ Calendar.getInstance()));
+ testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE)));
+ testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()),
+ new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()));
+ testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery())));
+ testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()),
+ new RootAllocator(Integer.MAX_VALUE)));
+ testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), Calendar.getInstance()));
+ testDataSets(sqlToArrow(
+ conn.createStatement().executeQuery(table.getQuery()),
+ new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()).build()));
+ testDataSets(sqlToArrow(
+ conn,
+ table.getQuery(),
+ new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()).build()));
+ }
+
+ @Test
+ public void testJdbcSchemaMetadata() throws SQLException {
+ JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(0), Calendar.getInstance(), true).build();
+ ResultSetMetaData rsmd = conn.createStatement().executeQuery(table.getQuery()).getMetaData();
+ Schema schema = JdbcToArrowUtils.jdbcToArrowSchema(rsmd, config);
+ JdbcToArrowTestHelper.assertFieldMetadataMatchesResultSetMetadata(rsmd, schema);
+ }
+
+ /**
+ * This method calls the assert methods for various DataSets.
+ *
+ * @param root VectorSchemaRoot for test
+ */
+ public void testDataSets(VectorSchemaRoot root) {
+ JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root);
+
+ switch (table.getType()) {
+ case BIGINT:
+ assertBigIntVectorValues((BigIntVector) root.getVector(table.getVector()), table.getValues().length,
+ table.getLongValues());
+ break;
+ case BINARY:
+ case BLOB:
+ assertVarBinaryVectorValues((VarBinaryVector) root.getVector(table.getVector()), table.getValues().length,
+ table.getBinaryValues());
+ break;
+ case BIT:
+ assertBitVectorValues((BitVector) root.getVector(table.getVector()), table.getValues().length,
+ table.getIntValues());
+ break;
+ case BOOL:
+ assertBooleanVectorValues((BitVector) root.getVector(table.getVector()), table.getValues().length,
+ table.getBoolValues());
+ break;
+ case CHAR:
+ case VARCHAR:
+ case CLOB:
+ assertVarcharVectorValues((VarCharVector) root.getVector(table.getVector()), table.getValues().length,
+ table.getCharValues());
+ break;
+ case DATE:
+ assertDateVectorValues((DateDayVector) root.getVector(table.getVector()), table.getValues().length,
+ table.getIntValues());
+ break;
+ case TIME:
+ assertTimeVectorValues((TimeMilliVector) root.getVector(table.getVector()), table.getValues().length,
+ table.getLongValues());
+ break;
+ case TIMESTAMP:
+ assertTimeStampVectorValues((TimeStampVector) root.getVector(table.getVector()), table.getValues().length,
+ table.getLongValues());
+ break;
+ case DECIMAL:
+ assertDecimalVectorValues((DecimalVector) root.getVector(table.getVector()), table.getValues().length,
+ table.getBigDecimalValues());
+ break;
+ case DOUBLE:
+ assertFloat8VectorValues((Float8Vector) root.getVector(table.getVector()), table.getValues().length,
+ table.getDoubleValues());
+ break;
+ case INT:
+ assertIntVectorValues((IntVector) root.getVector(table.getVector()), table.getValues().length,
+ table.getIntValues());
+ break;
+ case SMALLINT:
+ assertSmallIntVectorValues((SmallIntVector) root.getVector(table.getVector()), table.getValues().length,
+ table.getIntValues());
+ break;
+ case TINYINT:
+ assertTinyIntVectorValues((TinyIntVector) root.getVector(table.getVector()), table.getValues().length,
+ table.getIntValues());
+ break;
+ case REAL:
+ assertFloat4VectorValues((Float4Vector) root.getVector(table.getVector()), table.getValues().length,
+ table.getFloatValues());
+ break;
+ case NULL:
+ assertNullVectorValues((NullVector) root.getVector(table.getVector()), table.getRowCount());
+ break;
+ default:
+ // do nothing
+ break;
+ }
+ }
+}
+
diff --git a/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java
new file mode 100644
index 000000000..71cc70056
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java
@@ -0,0 +1,277 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc.h2;
+
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertBigIntVectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertBitVectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertBooleanVectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertDateVectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertDecimalVectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertFloat4VectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertFloat8VectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertIntVectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertNullValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertSmallIntVectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertTimeStampVectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertTimeVectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertTinyIntVectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertVarBinaryVectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertVarcharVectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getBinaryValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getBooleanValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getCharArray;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getDecimalValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getDoubleValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getFloatValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getIntValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getLongValues;
+
+import java.io.IOException;
+import java.sql.ResultSetMetaData;
+import java.sql.SQLException;
+import java.util.Arrays;
+import java.util.Calendar;
+import java.util.Collection;
+
+import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest;
+import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig;
+import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder;
+import org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper;
+import org.apache.arrow.adapter.jdbc.JdbcToArrowUtils;
+import org.apache.arrow.adapter.jdbc.Table;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.BitVector;
+import org.apache.arrow.vector.DateDayVector;
+import org.apache.arrow.vector.DecimalVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.SmallIntVector;
+import org.apache.arrow.vector.TimeMilliVector;
+import org.apache.arrow.vector.TimeStampVector;
+import org.apache.arrow.vector.TinyIntVector;
+import org.apache.arrow.vector.VarBinaryVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+/**
+ * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with null values for
+ * H2 database.
+ */
+@RunWith(Parameterized.class)
+public class JdbcToArrowNullTest extends AbstractJdbcToArrowTest {
+
+ private static final String NULL = "null";
+ private static final String SELECTED_NULL_ROW = "selected_null_row";
+ private static final String SELECTED_NULL_COLUMN = "selected_null_column";
+
+ private static final String[] testFiles = {
+ "h2/test1_all_datatypes_null_h2.yml",
+ "h2/test1_selected_datatypes_null_h2.yml",
+ "h2/test1_all_datatypes_selected_null_rows_h2.yml"
+ };
+
+ /**
+ * Constructor which populates the table object for each test iteration.
+ *
+ * @param table Table object
+ */
+ public JdbcToArrowNullTest(Table table) {
+ this.table = table;
+ }
+
+ /**
+ * Get the test data as a collection of Table objects for each test iteration.
+ *
+ * @return Collection of Table objects
+ * @throws SQLException on error
+ * @throws ClassNotFoundException on error
+ * @throws IOException on error
+ */
+ @Parameters
+ public static Collection<Object[]> getTestData() throws SQLException, ClassNotFoundException, IOException {
+ return Arrays.asList(prepareTestData(testFiles, JdbcToArrowNullTest.class));
+ }
+
+ /**
+ * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes with null values.
+ */
+ @Test
+ public void testJdbcToArrowValues() throws SQLException, IOException {
+ testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE),
+ Calendar.getInstance()));
+ testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE)));
+ testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()),
+ new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()));
+ testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery())));
+ testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()),
+ new RootAllocator(Integer.MAX_VALUE)));
+ testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), Calendar.getInstance()));
+ testDataSets(sqlToArrow(
+ conn.createStatement().executeQuery(table.getQuery()),
+ new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()).build()));
+ testDataSets(sqlToArrow(
+ conn,
+ table.getQuery(),
+ new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()).build()));
+ }
+
+ @Test
+ public void testJdbcSchemaMetadata() throws SQLException {
+ JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(0), Calendar.getInstance(), true).build();
+ ResultSetMetaData rsmd = conn.createStatement().executeQuery(table.getQuery()).getMetaData();
+ Schema schema = JdbcToArrowUtils.jdbcToArrowSchema(rsmd, config);
+ JdbcToArrowTestHelper.assertFieldMetadataMatchesResultSetMetadata(rsmd, schema);
+ }
+
+ /**
+ * This method calls the assert methods for various DataSets.
+ *
+ * @param root VectorSchemaRoot for test
+ */
+ public void testDataSets(VectorSchemaRoot root) {
+ JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root);
+
+ switch (table.getType()) {
+ case NULL:
+ sqlToArrowTestNullValues(table.getVectors(), root, table.getRowCount());
+ break;
+ case SELECTED_NULL_COLUMN:
+ sqlToArrowTestSelectedNullColumnsValues(table.getVectors(), root, table.getRowCount());
+ break;
+ case SELECTED_NULL_ROW:
+ testAllVectorValues(root);
+ break;
+ default:
+ // do nothing
+ break;
+ }
+ }
+
+ private void testAllVectorValues(VectorSchemaRoot root) {
+ JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root);
+
+ assertBigIntVectorValues((BigIntVector) root.getVector(BIGINT), table.getRowCount(),
+ getLongValues(table.getValues(), BIGINT));
+
+ assertTinyIntVectorValues((TinyIntVector) root.getVector(TINYINT), table.getRowCount(),
+ getIntValues(table.getValues(), TINYINT));
+
+ assertSmallIntVectorValues((SmallIntVector) root.getVector(SMALLINT), table.getRowCount(),
+ getIntValues(table.getValues(), SMALLINT));
+
+ assertVarBinaryVectorValues((VarBinaryVector) root.getVector(BINARY), table.getRowCount(),
+ getBinaryValues(table.getValues(), BINARY));
+
+ assertVarBinaryVectorValues((VarBinaryVector) root.getVector(BLOB), table.getRowCount(),
+ getBinaryValues(table.getValues(), BLOB));
+
+ assertVarcharVectorValues((VarCharVector) root.getVector(CLOB), table.getRowCount(),
+ getCharArray(table.getValues(), CLOB));
+
+ assertVarcharVectorValues((VarCharVector) root.getVector(VARCHAR), table.getRowCount(),
+ getCharArray(table.getValues(), VARCHAR));
+
+ assertVarcharVectorValues((VarCharVector) root.getVector(CHAR), table.getRowCount(),
+ getCharArray(table.getValues(), CHAR));
+
+ assertIntVectorValues((IntVector) root.getVector(INT), table.getRowCount(),
+ getIntValues(table.getValues(), INT));
+
+ assertBitVectorValues((BitVector) root.getVector(BIT), table.getRowCount(),
+ getIntValues(table.getValues(), BIT));
+
+ assertBooleanVectorValues((BitVector) root.getVector(BOOL), table.getRowCount(),
+ getBooleanValues(table.getValues(), BOOL));
+
+ assertDateVectorValues((DateDayVector) root.getVector(DATE), table.getRowCount(),
+ getIntValues(table.getValues(), DATE));
+
+ assertTimeVectorValues((TimeMilliVector) root.getVector(TIME), table.getRowCount(),
+ getLongValues(table.getValues(), TIME));
+
+ assertTimeStampVectorValues((TimeStampVector) root.getVector(TIMESTAMP), table.getRowCount(),
+ getLongValues(table.getValues(), TIMESTAMP));
+
+ assertDecimalVectorValues((DecimalVector) root.getVector(DECIMAL), table.getRowCount(),
+ getDecimalValues(table.getValues(), DECIMAL));
+
+ assertFloat8VectorValues((Float8Vector) root.getVector(DOUBLE), table.getRowCount(),
+ getDoubleValues(table.getValues(), DOUBLE));
+
+ assertFloat4VectorValues((Float4Vector) root.getVector(REAL), table.getRowCount(),
+ getFloatValues(table.getValues(), REAL));
+ }
+
+ /**
+ * This method assert tests null values in vectors for all the datatypes.
+ *
+ * @param vectors Vectors to test
+ * @param root VectorSchemaRoot for test
+ * @param rowCount number of rows
+ */
+ public void sqlToArrowTestNullValues(String[] vectors, VectorSchemaRoot root, int rowCount) {
+ assertNullValues((IntVector) root.getVector(vectors[0]), rowCount);
+ assertNullValues((BitVector) root.getVector(vectors[1]), rowCount);
+ assertNullValues((TinyIntVector) root.getVector(vectors[2]), rowCount);
+ assertNullValues((SmallIntVector) root.getVector(vectors[3]), rowCount);
+ assertNullValues((BigIntVector) root.getVector(vectors[4]), rowCount);
+ assertNullValues((DecimalVector) root.getVector(vectors[5]), rowCount);
+ assertNullValues((Float8Vector) root.getVector(vectors[6]), rowCount);
+ assertNullValues((Float4Vector) root.getVector(vectors[7]), rowCount);
+ assertNullValues((TimeMilliVector) root.getVector(vectors[8]), rowCount);
+ assertNullValues((DateDayVector) root.getVector(vectors[9]), rowCount);
+ assertNullValues((TimeStampVector) root.getVector(vectors[10]), rowCount);
+ assertNullValues((VarBinaryVector) root.getVector(vectors[11]), rowCount);
+ assertNullValues((VarCharVector) root.getVector(vectors[12]), rowCount);
+ assertNullValues((VarBinaryVector) root.getVector(vectors[13]), rowCount);
+ assertNullValues((VarCharVector) root.getVector(vectors[14]), rowCount);
+ assertNullValues((VarCharVector) root.getVector(vectors[15]), rowCount);
+ assertNullValues((BitVector) root.getVector(vectors[16]), rowCount);
+ }
+
+ /**
+ * This method assert tests null values in vectors for some selected datatypes.
+ *
+ * @param vectors Vectors to test
+ * @param root VectorSchemaRoot for test
+ * @param rowCount number of rows
+ */
+ public void sqlToArrowTestSelectedNullColumnsValues(String[] vectors, VectorSchemaRoot root, int rowCount) {
+ assertNullValues((BigIntVector) root.getVector(vectors[0]), rowCount);
+ assertNullValues((DecimalVector) root.getVector(vectors[1]), rowCount);
+ assertNullValues((Float8Vector) root.getVector(vectors[2]), rowCount);
+ assertNullValues((Float4Vector) root.getVector(vectors[3]), rowCount);
+ assertNullValues((TimeMilliVector) root.getVector(vectors[4]), rowCount);
+ assertNullValues((DateDayVector) root.getVector(vectors[5]), rowCount);
+ assertNullValues((TimeStampVector) root.getVector(vectors[6]), rowCount);
+ assertNullValues((VarBinaryVector) root.getVector(vectors[7]), rowCount);
+ assertNullValues((VarCharVector) root.getVector(vectors[8]), rowCount);
+ assertNullValues((VarBinaryVector) root.getVector(vectors[9]), rowCount);
+ assertNullValues((VarCharVector) root.getVector(vectors[10]), rowCount);
+ assertNullValues((VarCharVector) root.getVector(vectors[11]), rowCount);
+ assertNullValues((BitVector) root.getVector(vectors[12]), rowCount);
+ }
+
+}
diff --git a/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowOptionalColumnsTest.java b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowOptionalColumnsTest.java
new file mode 100644
index 000000000..84960dc88
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowOptionalColumnsTest.java
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc.h2;
+
+import static junit.framework.TestCase.assertTrue;
+import static org.junit.Assert.assertFalse;
+
+import java.io.IOException;
+import java.sql.SQLException;
+import java.util.Arrays;
+import java.util.Collection;
+
+import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest;
+import org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper;
+import org.apache.arrow.adapter.jdbc.Table;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+/**
+ * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality for
+ * (non-)optional columns, in particular with regard to the ensuing VectorSchemaRoot's schema.
+ */
+@RunWith(Parameterized.class)
+public class JdbcToArrowOptionalColumnsTest extends AbstractJdbcToArrowTest {
+ private static final String[] testFiles = {
+ "h2/test1_null_and_notnull.yml"
+ };
+
+ /**
+ * Constructor which populates the table object for each test iteration.
+ *
+ * @param table Table object
+ */
+ public JdbcToArrowOptionalColumnsTest(Table table) {
+ this.table = table;
+ }
+
+ /**
+ * Get the test data as a collection of Table objects for each test iteration.
+ *
+ * @return Collection of Table objects
+ * @throws SQLException on error
+ * @throws ClassNotFoundException on error
+ * @throws IOException on error
+ */
+ @Parameterized.Parameters
+ public static Collection<Object[]> getTestData() throws SQLException, ClassNotFoundException, IOException {
+ return Arrays.asList(prepareTestData(testFiles, JdbcToArrowOptionalColumnsTest.class));
+ }
+
+ /**
+ * Test Method to test JdbcToArrow Functionality for dealing with nullable and non-nullable columns.
+ */
+ @Test
+ public void testJdbcToArrowValues() throws SQLException, IOException {
+ testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE)));
+ }
+
+ /**
+ * This method calls the assert methods for various DataSets. We verify that a SQL `NULL` column becomes
+ * nullable in the VectorSchemaRoot, and that a SQL `NOT NULL` column becomes non-nullable.
+ *
+ * @param root VectorSchemaRoot for test
+ */
+ public void testDataSets(VectorSchemaRoot root) {
+ JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root);
+
+ assertTrue(root.getSchema().getFields().get(0).isNullable());
+ assertFalse(root.getSchema().getFields().get(1).isNullable());
+ }
+
+}
diff --git a/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java
new file mode 100644
index 000000000..ca1c0c00b
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java
@@ -0,0 +1,1350 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc.h2;
+
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertBigIntVectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertBitVectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertBooleanVectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertDateVectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertDecimalVectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertFloat4VectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertFloat8VectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertIntVectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertNullVectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertSmallIntVectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertTimeStampVectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertTimeVectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertTinyIntVectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertVarBinaryVectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertVarcharVectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getBinaryValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getBooleanValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getCharArray;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getDecimalValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getDoubleValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getFloatValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getIntValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getLongValues;
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+import java.math.BigDecimal;
+import java.net.URL;
+import java.sql.Array;
+import java.sql.Blob;
+import java.sql.Clob;
+import java.sql.Date;
+import java.sql.NClob;
+import java.sql.Ref;
+import java.sql.ResultSet;
+import java.sql.ResultSetMetaData;
+import java.sql.RowId;
+import java.sql.SQLException;
+import java.sql.SQLWarning;
+import java.sql.SQLXML;
+import java.sql.Statement;
+import java.sql.Time;
+import java.sql.Timestamp;
+import java.sql.Types;
+import java.util.Arrays;
+import java.util.Calendar;
+import java.util.Collection;
+import java.util.Map;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest;
+import org.apache.arrow.adapter.jdbc.ArrowVectorIterator;
+import org.apache.arrow.adapter.jdbc.JdbcToArrow;
+import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig;
+import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder;
+import org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper;
+import org.apache.arrow.adapter.jdbc.JdbcToArrowUtils;
+import org.apache.arrow.adapter.jdbc.Table;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.BitVector;
+import org.apache.arrow.vector.DateDayVector;
+import org.apache.arrow.vector.DecimalVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.SmallIntVector;
+import org.apache.arrow.vector.TimeMilliVector;
+import org.apache.arrow.vector.TimeStampVector;
+import org.apache.arrow.vector.TinyIntVector;
+import org.apache.arrow.vector.VarBinaryVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+/**
+ * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with various data types
+ * for H2 database using single test data file.
+ */
+@RunWith(Parameterized.class)
+public class JdbcToArrowTest extends AbstractJdbcToArrowTest {
+
+ private static final String[] testFiles = {"h2/test1_all_datatypes_h2.yml"};
+
+ /**
+ * Constructor which populates the table object for each test iteration.
+ *
+ * @param table Table object
+ * @param reuseVectorSchemaRoot A flag indicating if we should reuse vector schema roots.
+ */
+ public JdbcToArrowTest(Table table, boolean reuseVectorSchemaRoot) {
+ this.table = table;
+ this.reuseVectorSchemaRoot = reuseVectorSchemaRoot;
+ }
+
+ /**
+ * Get the test data as a collection of Table objects for each test iteration.
+ *
+ * @return Collection of Table objects
+ * @throws SQLException on error
+ * @throws ClassNotFoundException on error
+ * @throws IOException on error
+ */
+ @Parameterized.Parameters(name = "table = {0}, reuse batch = {1}")
+ public static Collection<Object[]> getTestData() throws SQLException, ClassNotFoundException, IOException {
+ return Arrays.stream(prepareTestData(testFiles, JdbcToArrowTest.class)).flatMap(row ->
+ Stream.of(new Object[] {row[0], true}, new Object[] {row[0], false})).collect(Collectors.toList());
+ }
+
+ /**
+ * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes with only one test data file.
+ */
+ @Test
+ public void testJdbcToArrowValues() throws SQLException, IOException {
+ testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE),
+ Calendar.getInstance()));
+ testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE)));
+ testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()),
+ new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()));
+ testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery())));
+ testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()),
+ new RootAllocator(Integer.MAX_VALUE)));
+ testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()),
+ Calendar.getInstance()));
+ testDataSets(sqlToArrow(
+ conn.createStatement().executeQuery(table.getQuery()),
+ new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()).build()));
+ testDataSets(sqlToArrow(
+ conn,
+ table.getQuery(),
+ new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()).build()));
+ }
+
+ @Test
+ public void testJdbcSchemaMetadata() throws SQLException {
+ JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(0), Calendar.getInstance(), true)
+ .setReuseVectorSchemaRoot(reuseVectorSchemaRoot).build();
+ ResultSetMetaData rsmd = conn.createStatement().executeQuery(table.getQuery()).getMetaData();
+ Schema schema = JdbcToArrowUtils.jdbcToArrowSchema(rsmd, config);
+ JdbcToArrowTestHelper.assertFieldMetadataMatchesResultSetMetadata(rsmd, schema);
+ }
+
+ /**
+ * This method calls the assert methods for various DataSets.
+ *
+ * @param root VectorSchemaRoot for test
+ */
+ public void testDataSets(VectorSchemaRoot root) {
+ JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root);
+
+ assertBigIntVectorValues((BigIntVector) root.getVector(BIGINT), table.getRowCount(),
+ getLongValues(table.getValues(), BIGINT));
+
+ assertTinyIntVectorValues((TinyIntVector) root.getVector(TINYINT), table.getRowCount(),
+ getIntValues(table.getValues(), TINYINT));
+
+ assertSmallIntVectorValues((SmallIntVector) root.getVector(SMALLINT), table.getRowCount(),
+ getIntValues(table.getValues(), SMALLINT));
+
+ assertVarBinaryVectorValues((VarBinaryVector) root.getVector(BINARY), table.getRowCount(),
+ getBinaryValues(table.getValues(), BINARY));
+
+ assertVarBinaryVectorValues((VarBinaryVector) root.getVector(BLOB), table.getRowCount(),
+ getBinaryValues(table.getValues(), BLOB));
+
+ assertVarcharVectorValues((VarCharVector) root.getVector(CLOB), table.getRowCount(),
+ getCharArray(table.getValues(), CLOB));
+
+ assertVarcharVectorValues((VarCharVector) root.getVector(VARCHAR), table.getRowCount(),
+ getCharArray(table.getValues(), VARCHAR));
+
+ assertVarcharVectorValues((VarCharVector) root.getVector(CHAR), table.getRowCount(),
+ getCharArray(table.getValues(), CHAR));
+
+ assertIntVectorValues((IntVector) root.getVector(INT), table.getRowCount(),
+ getIntValues(table.getValues(), INT));
+
+ assertBitVectorValues((BitVector) root.getVector(BIT), table.getRowCount(),
+ getIntValues(table.getValues(), BIT));
+
+ assertBooleanVectorValues((BitVector) root.getVector(BOOL), table.getRowCount(),
+ getBooleanValues(table.getValues(), BOOL));
+
+ assertDateVectorValues((DateDayVector) root.getVector(DATE), table.getRowCount(),
+ getIntValues(table.getValues(), DATE));
+
+ assertTimeVectorValues((TimeMilliVector) root.getVector(TIME), table.getRowCount(),
+ getLongValues(table.getValues(), TIME));
+
+ assertTimeStampVectorValues((TimeStampVector) root.getVector(TIMESTAMP), table.getRowCount(),
+ getLongValues(table.getValues(), TIMESTAMP));
+
+ assertDecimalVectorValues((DecimalVector) root.getVector(DECIMAL), table.getRowCount(),
+ getDecimalValues(table.getValues(), DECIMAL));
+
+ assertFloat8VectorValues((Float8Vector) root.getVector(DOUBLE), table.getRowCount(),
+ getDoubleValues(table.getValues(), DOUBLE));
+
+ assertFloat4VectorValues((Float4Vector) root.getVector(REAL), table.getRowCount(),
+ getFloatValues(table.getValues(), REAL));
+
+ assertNullVectorValues((NullVector) root.getVector(NULL), table.getRowCount());
+ }
+
+ @Test
+ public void runLargeNumberOfRows() throws IOException, SQLException {
+ BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+ int x = 0;
+ final int targetRows = 600000;
+ ResultSet rs = new FakeResultSet(targetRows);
+ JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(
+ allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false)
+ .setReuseVectorSchemaRoot(reuseVectorSchemaRoot).build();
+
+ try (ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config)) {
+ while (iter.hasNext()) {
+ VectorSchemaRoot root = iter.next();
+ x += root.getRowCount();
+ if (!reuseVectorSchemaRoot) {
+ root.close();
+ }
+ }
+ } finally {
+ allocator.close();
+ }
+
+ assertEquals(x, targetRows);
+ }
+
+ private class FakeResultSet implements ResultSet {
+
+ public int numRows;
+
+ FakeResultSet(int numRows) {
+ this.numRows = numRows;
+ }
+
+ @Override
+ public boolean next() throws SQLException {
+ numRows--;
+ return numRows >= 0;
+ }
+
+ @Override
+ public void close() throws SQLException {
+
+ }
+
+ @Override
+ public boolean wasNull() throws SQLException {
+ return false;
+ }
+
+ @Override
+ public String getString(int columnIndex) throws SQLException {
+ return "test123test123" + numRows;
+ }
+
+ @Override
+ public boolean getBoolean(int columnIndex) throws SQLException {
+ return false;
+ }
+
+ @Override
+ public byte getByte(int columnIndex) throws SQLException {
+ return 0;
+ }
+
+ @Override
+ public short getShort(int columnIndex) throws SQLException {
+ return 0;
+ }
+
+ @Override
+ public int getInt(int columnIndex) throws SQLException {
+ return 0;
+ }
+
+ @Override
+ public long getLong(int columnIndex) throws SQLException {
+ return 0;
+ }
+
+ @Override
+ public float getFloat(int columnIndex) throws SQLException {
+ return 0;
+ }
+
+ @Override
+ public double getDouble(int columnIndex) throws SQLException {
+ return 0;
+ }
+
+ @Override
+ public BigDecimal getBigDecimal(int columnIndex, int scale) throws SQLException {
+ return new BigDecimal(5);
+ }
+
+ @Override
+ public byte[] getBytes(int columnIndex) throws SQLException {
+ return new byte[0];
+ }
+
+ @Override
+ public Date getDate(int columnIndex) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public Time getTime(int columnIndex) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public Timestamp getTimestamp(int columnIndex) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public InputStream getAsciiStream(int columnIndex) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public InputStream getUnicodeStream(int columnIndex) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public InputStream getBinaryStream(int columnIndex) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public String getString(String columnLabel) throws SQLException {
+ throw new UnsupportedOperationException("get column by label not supported");
+ }
+
+ @Override
+ public boolean getBoolean(String columnLabel) throws SQLException {
+ return false;
+ }
+
+ @Override
+ public byte getByte(String columnLabel) throws SQLException {
+ return 0;
+ }
+
+ @Override
+ public short getShort(String columnLabel) throws SQLException {
+ return 0;
+ }
+
+ @Override
+ public int getInt(String columnLabel) throws SQLException {
+ return 0;
+ }
+
+ @Override
+ public long getLong(String columnLabel) throws SQLException {
+ return 0;
+ }
+
+ @Override
+ public float getFloat(String columnLabel) throws SQLException {
+ return 0;
+ }
+
+ @Override
+ public double getDouble(String columnLabel) throws SQLException {
+ return 0;
+ }
+
+ @Override
+ public BigDecimal getBigDecimal(String columnLabel, int scale) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public byte[] getBytes(String columnLabel) throws SQLException {
+ return new byte[0];
+ }
+
+ @Override
+ public Date getDate(String columnLabel) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public Time getTime(String columnLabel) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public Timestamp getTimestamp(String columnLabel) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public InputStream getAsciiStream(String columnLabel) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public InputStream getUnicodeStream(String columnLabel) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public InputStream getBinaryStream(String columnLabel) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public SQLWarning getWarnings() throws SQLException {
+ return null;
+ }
+
+ @Override
+ public void clearWarnings() throws SQLException {
+
+ }
+
+ @Override
+ public String getCursorName() throws SQLException {
+ return null;
+ }
+
+ @Override
+ public ResultSetMetaData getMetaData() throws SQLException {
+ return new ResultSetMetaData() {
+ @Override
+ public int getColumnCount() throws SQLException {
+ return 5;
+ }
+
+ @Override
+ public boolean isAutoIncrement(int column) throws SQLException {
+ return false;
+ }
+
+ @Override
+ public boolean isCaseSensitive(int column) throws SQLException {
+ return false;
+ }
+
+ @Override
+ public boolean isSearchable(int column) throws SQLException {
+ return false;
+ }
+
+ @Override
+ public boolean isCurrency(int column) throws SQLException {
+ return false;
+ }
+
+ @Override
+ public int isNullable(int column) throws SQLException {
+ return 0;
+ }
+
+ @Override
+ public boolean isSigned(int column) throws SQLException {
+ return false;
+ }
+
+ @Override
+ public int getColumnDisplaySize(int column) throws SQLException {
+ return 0;
+ }
+
+ @Override
+ public String getColumnLabel(int column) throws SQLException {
+ return getColumnName(column);
+ }
+
+ @Override
+ public String getColumnName(int column) throws SQLException {
+ return "col_" + column;
+ }
+
+ @Override
+ public String getSchemaName(int column) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public int getPrecision(int column) throws SQLException {
+ return 0;
+ }
+
+ @Override
+ public int getScale(int column) throws SQLException {
+ return 0;
+ }
+
+ @Override
+ public String getTableName(int column) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public String getCatalogName(int column) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public int getColumnType(int column) throws SQLException {
+ switch (column) {
+ case 1:
+ return Types.VARCHAR;
+ case 2:
+ return Types.INTEGER;
+ case 3:
+ return Types.BIGINT;
+ case 4:
+ return Types.VARCHAR;
+ case 5:
+ return Types.VARCHAR;
+ default:
+ throw new IllegalArgumentException("not supported");
+ }
+
+ }
+
+ @Override
+ public String getColumnTypeName(int column) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public boolean isReadOnly(int column) throws SQLException {
+ return false;
+ }
+
+ @Override
+ public boolean isWritable(int column) throws SQLException {
+ return false;
+ }
+
+ @Override
+ public boolean isDefinitelyWritable(int column) throws SQLException {
+ return false;
+ }
+
+ @Override
+ public String getColumnClassName(int column) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public <T> T unwrap(Class<T> iface) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public boolean isWrapperFor(Class<?> iface) throws SQLException {
+ return false;
+ }
+ };
+ }
+
+ @Override
+ public Object getObject(int columnIndex) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public Object getObject(String columnLabel) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public int findColumn(String columnLabel) throws SQLException {
+ return 0;
+ }
+
+ @Override
+ public Reader getCharacterStream(int columnIndex) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public Reader getCharacterStream(String columnLabel) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public BigDecimal getBigDecimal(int columnIndex) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public BigDecimal getBigDecimal(String columnLabel) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public boolean isBeforeFirst() throws SQLException {
+ return false;
+ }
+
+ @Override
+ public boolean isAfterLast() throws SQLException {
+ return numRows < 0;
+ }
+
+ @Override
+ public boolean isFirst() throws SQLException {
+ return false;
+ }
+
+ @Override
+ public boolean isLast() throws SQLException {
+ return false;
+ }
+
+ @Override
+ public void beforeFirst() throws SQLException {
+
+ }
+
+ @Override
+ public void afterLast() throws SQLException {
+
+ }
+
+ @Override
+ public boolean first() throws SQLException {
+ return false;
+ }
+
+ @Override
+ public boolean last() throws SQLException {
+ return false;
+ }
+
+ @Override
+ public int getRow() throws SQLException {
+ return 0;
+ }
+
+ @Override
+ public boolean absolute(int row) throws SQLException {
+ return false;
+ }
+
+ @Override
+ public boolean relative(int rows) throws SQLException {
+ return false;
+ }
+
+ @Override
+ public boolean previous() throws SQLException {
+ return false;
+ }
+
+ @Override
+ public void setFetchDirection(int direction) throws SQLException {
+
+ }
+
+ @Override
+ public int getFetchDirection() throws SQLException {
+ return 0;
+ }
+
+ @Override
+ public void setFetchSize(int rows) throws SQLException {
+
+ }
+
+ @Override
+ public int getFetchSize() throws SQLException {
+ return 0;
+ }
+
+ @Override
+ public int getType() throws SQLException {
+ return 0;
+ }
+
+ @Override
+ public int getConcurrency() throws SQLException {
+ return 0;
+ }
+
+ @Override
+ public boolean rowUpdated() throws SQLException {
+ return false;
+ }
+
+ @Override
+ public boolean rowInserted() throws SQLException {
+ return false;
+ }
+
+ @Override
+ public boolean rowDeleted() throws SQLException {
+ return false;
+ }
+
+ @Override
+ public void updateNull(int columnIndex) throws SQLException {
+
+ }
+
+ @Override
+ public void updateBoolean(int columnIndex, boolean x) throws SQLException {
+
+ }
+
+ @Override
+ public void updateByte(int columnIndex, byte x) throws SQLException {
+
+ }
+
+ @Override
+ public void updateShort(int columnIndex, short x) throws SQLException {
+
+ }
+
+ @Override
+ public void updateInt(int columnIndex, int x) throws SQLException {
+
+ }
+
+ @Override
+ public void updateLong(int columnIndex, long x) throws SQLException {
+
+ }
+
+ @Override
+ public void updateFloat(int columnIndex, float x) throws SQLException {
+
+ }
+
+ @Override
+ public void updateDouble(int columnIndex, double x) throws SQLException {
+
+ }
+
+ @Override
+ public void updateBigDecimal(int columnIndex, BigDecimal x) throws SQLException {
+
+ }
+
+ @Override
+ public void updateString(int columnIndex, String x) throws SQLException {
+
+ }
+
+ @Override
+ public void updateBytes(int columnIndex, byte[] x) throws SQLException {
+
+ }
+
+ @Override
+ public void updateDate(int columnIndex, Date x) throws SQLException {
+
+ }
+
+ @Override
+ public void updateTime(int columnIndex, Time x) throws SQLException {
+
+ }
+
+ @Override
+ public void updateTimestamp(int columnIndex, Timestamp x) throws SQLException {
+
+ }
+
+ @Override
+ public void updateAsciiStream(int columnIndex, InputStream x, int length) throws SQLException {
+
+ }
+
+ @Override
+ public void updateBinaryStream(int columnIndex, InputStream x, int length) throws SQLException {
+
+ }
+
+ @Override
+ public void updateCharacterStream(int columnIndex, Reader x, int length) throws SQLException {
+
+ }
+
+ @Override
+ public void updateObject(int columnIndex, Object x, int scaleOrLength) throws SQLException {
+
+ }
+
+ @Override
+ public void updateObject(int columnIndex, Object x) throws SQLException {
+
+ }
+
+ @Override
+ public void updateNull(String columnLabel) throws SQLException {
+
+ }
+
+ @Override
+ public void updateBoolean(String columnLabel, boolean x) throws SQLException {
+
+ }
+
+ @Override
+ public void updateByte(String columnLabel, byte x) throws SQLException {
+
+ }
+
+ @Override
+ public void updateShort(String columnLabel, short x) throws SQLException {
+
+ }
+
+ @Override
+ public void updateInt(String columnLabel, int x) throws SQLException {
+
+ }
+
+ @Override
+ public void updateLong(String columnLabel, long x) throws SQLException {
+
+ }
+
+ @Override
+ public void updateFloat(String columnLabel, float x) throws SQLException {
+
+ }
+
+ @Override
+ public void updateDouble(String columnLabel, double x) throws SQLException {
+
+ }
+
+ @Override
+ public void updateBigDecimal(String columnLabel, BigDecimal x) throws SQLException {
+
+ }
+
+ @Override
+ public void updateString(String columnLabel, String x) throws SQLException {
+
+ }
+
+ @Override
+ public void updateBytes(String columnLabel, byte[] x) throws SQLException {
+
+ }
+
+ @Override
+ public void updateDate(String columnLabel, Date x) throws SQLException {
+
+ }
+
+ @Override
+ public void updateTime(String columnLabel, Time x) throws SQLException {
+
+ }
+
+ @Override
+ public void updateTimestamp(String columnLabel, Timestamp x) throws SQLException {
+
+ }
+
+ @Override
+ public void updateAsciiStream(String columnLabel, InputStream x, int length) throws SQLException {
+
+ }
+
+ @Override
+ public void updateBinaryStream(String columnLabel, InputStream x, int length) throws SQLException {
+
+ }
+
+ @Override
+ public void updateCharacterStream(String columnLabel, Reader reader, int length) throws SQLException {
+
+ }
+
+ @Override
+ public void updateObject(String columnLabel, Object x, int scaleOrLength) throws SQLException {
+
+ }
+
+ @Override
+ public void updateObject(String columnLabel, Object x) throws SQLException {
+
+ }
+
+ @Override
+ public void insertRow() throws SQLException {
+
+ }
+
+ @Override
+ public void updateRow() throws SQLException {
+
+ }
+
+ @Override
+ public void deleteRow() throws SQLException {
+
+ }
+
+ @Override
+ public void refreshRow() throws SQLException {
+
+ }
+
+ @Override
+ public void cancelRowUpdates() throws SQLException {
+
+ }
+
+ @Override
+ public void moveToInsertRow() throws SQLException {
+
+ }
+
+ @Override
+ public void moveToCurrentRow() throws SQLException {
+
+ }
+
+ @Override
+ public Statement getStatement() throws SQLException {
+ return null;
+ }
+
+ @Override
+ public Object getObject(int columnIndex, Map<String, Class<?>> map) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public Ref getRef(int columnIndex) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public Blob getBlob(int columnIndex) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public Clob getClob(int columnIndex) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public Array getArray(int columnIndex) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public Object getObject(String columnLabel, Map<String, Class<?>> map) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public Ref getRef(String columnLabel) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public Blob getBlob(String columnLabel) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public Clob getClob(String columnLabel) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public Array getArray(String columnLabel) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public Date getDate(int columnIndex, Calendar cal) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public Date getDate(String columnLabel, Calendar cal) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public Time getTime(int columnIndex, Calendar cal) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public Time getTime(String columnLabel, Calendar cal) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public Timestamp getTimestamp(int columnIndex, Calendar cal) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public Timestamp getTimestamp(String columnLabel, Calendar cal) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public URL getURL(int columnIndex) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public URL getURL(String columnLabel) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public void updateRef(int columnIndex, Ref x) throws SQLException {
+
+ }
+
+ @Override
+ public void updateRef(String columnLabel, Ref x) throws SQLException {
+
+ }
+
+ @Override
+ public void updateBlob(int columnIndex, Blob x) throws SQLException {
+
+ }
+
+ @Override
+ public void updateBlob(String columnLabel, Blob x) throws SQLException {
+
+ }
+
+ @Override
+ public void updateClob(int columnIndex, Clob x) throws SQLException {
+
+ }
+
+ @Override
+ public void updateClob(String columnLabel, Clob x) throws SQLException {
+
+ }
+
+ @Override
+ public void updateArray(int columnIndex, Array x) throws SQLException {
+
+ }
+
+ @Override
+ public void updateArray(String columnLabel, Array x) throws SQLException {
+
+ }
+
+ @Override
+ public RowId getRowId(int columnIndex) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public RowId getRowId(String columnLabel) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public void updateRowId(int columnIndex, RowId x) throws SQLException {
+
+ }
+
+ @Override
+ public void updateRowId(String columnLabel, RowId x) throws SQLException {
+
+ }
+
+ @Override
+ public int getHoldability() throws SQLException {
+ return 0;
+ }
+
+ @Override
+ public boolean isClosed() throws SQLException {
+ return false;
+ }
+
+ @Override
+ public void updateNString(int columnIndex, String nString) throws SQLException {
+
+ }
+
+ @Override
+ public void updateNString(String columnLabel, String nString) throws SQLException {
+
+ }
+
+ @Override
+ public void updateNClob(int columnIndex, NClob nClob) throws SQLException {
+
+ }
+
+ @Override
+ public void updateNClob(String columnLabel, NClob nClob) throws SQLException {
+
+ }
+
+ @Override
+ public NClob getNClob(int columnIndex) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public NClob getNClob(String columnLabel) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public SQLXML getSQLXML(int columnIndex) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public SQLXML getSQLXML(String columnLabel) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public void updateSQLXML(int columnIndex, SQLXML xmlObject) throws SQLException {
+
+ }
+
+ @Override
+ public void updateSQLXML(String columnLabel, SQLXML xmlObject) throws SQLException {
+
+ }
+
+ @Override
+ public String getNString(int columnIndex) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public String getNString(String columnLabel) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public Reader getNCharacterStream(int columnIndex) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public Reader getNCharacterStream(String columnLabel) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public void updateNCharacterStream(int columnIndex, Reader x, long length) throws SQLException {
+
+ }
+
+ @Override
+ public void updateNCharacterStream(String columnLabel, Reader reader, long length) throws SQLException {
+
+ }
+
+ @Override
+ public void updateAsciiStream(int columnIndex, InputStream x, long length) throws SQLException {
+
+ }
+
+ @Override
+ public void updateBinaryStream(int columnIndex, InputStream x, long length) throws SQLException {
+
+ }
+
+ @Override
+ public void updateCharacterStream(int columnIndex, Reader x, long length) throws SQLException {
+
+ }
+
+ @Override
+ public void updateAsciiStream(String columnLabel, InputStream x, long length) throws SQLException {
+
+ }
+
+ @Override
+ public void updateBinaryStream(String columnLabel, InputStream x, long length) throws SQLException {
+
+ }
+
+ @Override
+ public void updateCharacterStream(String columnLabel, Reader reader, long length) throws SQLException {
+
+ }
+
+ @Override
+ public void updateBlob(int columnIndex, InputStream inputStream, long length) throws SQLException {
+
+ }
+
+ @Override
+ public void updateBlob(String columnLabel, InputStream inputStream, long length) throws SQLException {
+
+ }
+
+ @Override
+ public void updateClob(int columnIndex, Reader reader, long length) throws SQLException {
+
+ }
+
+ @Override
+ public void updateClob(String columnLabel, Reader reader, long length) throws SQLException {
+
+ }
+
+ @Override
+ public void updateNClob(int columnIndex, Reader reader, long length) throws SQLException {
+
+ }
+
+ @Override
+ public void updateNClob(String columnLabel, Reader reader, long length) throws SQLException {
+
+ }
+
+ @Override
+ public void updateNCharacterStream(int columnIndex, Reader x) throws SQLException {
+
+ }
+
+ @Override
+ public void updateNCharacterStream(String columnLabel, Reader reader) throws SQLException {
+
+ }
+
+ @Override
+ public void updateAsciiStream(int columnIndex, InputStream x) throws SQLException {
+
+ }
+
+ @Override
+ public void updateBinaryStream(int columnIndex, InputStream x) throws SQLException {
+
+ }
+
+ @Override
+ public void updateCharacterStream(int columnIndex, Reader x) throws SQLException {
+
+ }
+
+ @Override
+ public void updateAsciiStream(String columnLabel, InputStream x) throws SQLException {
+
+ }
+
+ @Override
+ public void updateBinaryStream(String columnLabel, InputStream x) throws SQLException {
+
+ }
+
+ @Override
+ public void updateCharacterStream(String columnLabel, Reader reader) throws SQLException {
+
+ }
+
+ @Override
+ public void updateBlob(int columnIndex, InputStream inputStream) throws SQLException {
+
+ }
+
+ @Override
+ public void updateBlob(String columnLabel, InputStream inputStream) throws SQLException {
+
+ }
+
+ @Override
+ public void updateClob(int columnIndex, Reader reader) throws SQLException {
+
+ }
+
+ @Override
+ public void updateClob(String columnLabel, Reader reader) throws SQLException {
+
+ }
+
+ @Override
+ public void updateNClob(int columnIndex, Reader reader) throws SQLException {
+
+ }
+
+ @Override
+ public void updateNClob(String columnLabel, Reader reader) throws SQLException {
+
+ }
+
+ @Override
+ public <T> T getObject(int columnIndex, Class<T> type) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public <T> T getObject(String columnLabel, Class<T> type) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public <T> T unwrap(Class<T> iface) throws SQLException {
+ return null;
+ }
+
+ @Override
+ public boolean isWrapperFor(Class<?> iface) throws SQLException {
+ return false;
+ }
+ }
+}
diff --git a/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java
new file mode 100644
index 000000000..f5ddbdb9b
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java
@@ -0,0 +1,168 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc.h2;
+
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertDateVectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertTimeStampVectorValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertTimeVectorValues;
+
+import java.io.IOException;
+import java.sql.ResultSetMetaData;
+import java.sql.SQLException;
+import java.util.Arrays;
+import java.util.Calendar;
+import java.util.Collection;
+import java.util.TimeZone;
+
+import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest;
+import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig;
+import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder;
+import org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper;
+import org.apache.arrow.adapter.jdbc.JdbcToArrowUtils;
+import org.apache.arrow.adapter.jdbc.Table;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.DateDayVector;
+import org.apache.arrow.vector.TimeMilliVector;
+import org.apache.arrow.vector.TimeStampVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+/**
+ * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with TimeZone based Date,
+ * Time and Timestamp datatypes for H2 database.
+ */
+
+@RunWith(Parameterized.class)
+public class JdbcToArrowTimeZoneTest extends AbstractJdbcToArrowTest {
+
+ private static final String EST_DATE = "est_date";
+ private static final String EST_TIME = "est_time";
+ private static final String EST_TIMESTAMP = "est_timestamp";
+ private static final String GMT_DATE = "gmt_date";
+ private static final String GMT_TIME = "gmt_time";
+ private static final String GMT_TIMESTAMP = "gmt_timestamp";
+ private static final String PST_DATE = "pst_date";
+ private static final String PST_TIME = "pst_time";
+ private static final String PST_TIMESTAMP = "pst_timestamp";
+
+ private static final String[] testFiles = {
+ "h2/test1_est_date_h2.yml",
+ "h2/test1_est_time_h2.yml",
+ "h2/test1_est_timestamp_h2.yml",
+ "h2/test1_gmt_date_h2.yml",
+ "h2/test1_gmt_time_h2.yml",
+ "h2/test1_gmt_timestamp_h2.yml",
+ "h2/test1_pst_date_h2.yml",
+ "h2/test1_pst_time_h2.yml",
+ "h2/test1_pst_timestamp_h2.yml"
+ };
+
+ /**
+ * Constructor which populates the table object for each test iteration.
+ *
+ * @param table Table object
+ */
+ public JdbcToArrowTimeZoneTest(Table table) {
+ this.table = table;
+ }
+
+ /**
+ * Get the test data as a collection of Table objects for each test iteration.
+ *
+ * @return Collection of Table objects
+ * @throws SQLException on error
+ * @throws ClassNotFoundException on error
+ * @throws IOException on error
+ */
+ @Parameters
+ public static Collection<Object[]> getTestData() throws SQLException, ClassNotFoundException, IOException {
+ return Arrays.asList(prepareTestData(testFiles, JdbcToArrowTimeZoneTest.class));
+ }
+
+ /**
+ * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes with TimeZone based Date,
+ * Time and Timestamp datatype.
+ */
+ @Test
+ public void testJdbcToArrowValues() throws SQLException, IOException {
+ testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE),
+ Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))));
+ testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()),
+ new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))));
+ testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()),
+ Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))));
+ testDataSets(sqlToArrow(
+ conn.createStatement().executeQuery(table.getQuery()),
+ new JdbcToArrowConfigBuilder(
+ new RootAllocator(Integer.MAX_VALUE),
+ Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))).build()));
+ testDataSets(sqlToArrow(
+ conn,
+ table.getQuery(),
+ new JdbcToArrowConfigBuilder(
+ new RootAllocator(Integer.MAX_VALUE),
+ Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))).build()));
+ }
+
+ @Test
+ public void testJdbcSchemaMetadata() throws SQLException {
+ Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()));
+ JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(0), calendar, true).build();
+ ResultSetMetaData rsmd = conn.createStatement().executeQuery(table.getQuery()).getMetaData();
+ Schema schema = JdbcToArrowUtils.jdbcToArrowSchema(rsmd, config);
+ JdbcToArrowTestHelper.assertFieldMetadataMatchesResultSetMetadata(rsmd, schema);
+ }
+
+ /**
+ * This method calls the assert methods for various DataSets.
+ *
+ * @param root VectorSchemaRoot for test
+ */
+ public void testDataSets(VectorSchemaRoot root) {
+ JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root);
+
+ switch (table.getType()) {
+ case EST_DATE:
+ case GMT_DATE:
+ case PST_DATE:
+ assertDateVectorValues((DateDayVector) root.getVector(table.getVector()), table.getValues().length,
+ table.getIntValues());
+ break;
+ case EST_TIME:
+ case GMT_TIME:
+ case PST_TIME:
+ assertTimeVectorValues((TimeMilliVector) root.getVector(table.getVector()), table.getValues().length,
+ table.getLongValues());
+ break;
+ case EST_TIMESTAMP:
+ case GMT_TIMESTAMP:
+ case PST_TIMESTAMP:
+ assertTimeStampVectorValues((TimeStampVector) root.getVector(table.getVector()), table.getValues().length,
+ table.getLongValues());
+ break;
+ default:
+ // do nothing
+ break;
+ }
+ }
+
+}
diff --git a/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowVectorIteratorTest.java b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowVectorIteratorTest.java
new file mode 100644
index 000000000..6206ddcf7
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowVectorIteratorTest.java
@@ -0,0 +1,467 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc.h2;
+
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getBinaryValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getBooleanValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getCharArray;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getDecimalValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getDoubleValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getFloatValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getIntValues;
+import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getLongValues;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.sql.SQLException;
+import java.sql.Types;
+import java.util.ArrayList;
+import java.util.Calendar;
+import java.util.List;
+
+import org.apache.arrow.adapter.jdbc.ArrowVectorIterator;
+import org.apache.arrow.adapter.jdbc.JdbcToArrow;
+import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig;
+import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder;
+import org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper;
+import org.apache.arrow.adapter.jdbc.Table;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.BitVector;
+import org.apache.arrow.vector.DateDayVector;
+import org.apache.arrow.vector.DecimalVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.SmallIntVector;
+import org.apache.arrow.vector.TimeMilliVector;
+import org.apache.arrow.vector.TimeStampMilliTZVector;
+import org.apache.arrow.vector.TimeStampMilliVector;
+import org.apache.arrow.vector.TimeStampVector;
+import org.apache.arrow.vector.TinyIntVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.VarBinaryVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.types.FloatingPointPrecision;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+@RunWith(Parameterized.class)
+public class JdbcToArrowVectorIteratorTest extends JdbcToArrowTest {
+
+ /**
+ * Constructor which populates the table object for each test iteration.
+ *
+ * @param table Table object
+ * @param reuseVectorSchemaRoot A flag indicating if we should reuse vector schema roots.
+ */
+ public JdbcToArrowVectorIteratorTest(Table table, boolean reuseVectorSchemaRoot) {
+ super(table, reuseVectorSchemaRoot);
+ }
+
+ @Test
+ @Override
+ public void testJdbcToArrowValues() throws SQLException, IOException {
+
+ JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE),
+ Calendar.getInstance()).setTargetBatchSize(3).build();
+
+ ArrowVectorIterator iterator =
+ JdbcToArrow.sqlToArrowVectorIterator(conn.createStatement().executeQuery(table.getQuery()), config);
+
+ validate(iterator);
+ }
+
+ @Test
+ public void testVectorSchemaRootReuse() throws SQLException, IOException {
+
+ JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE),
+ Calendar.getInstance()).setTargetBatchSize(3).setReuseVectorSchemaRoot(reuseVectorSchemaRoot).build();
+
+ ArrowVectorIterator iterator =
+ JdbcToArrow.sqlToArrowVectorIterator(conn.createStatement().executeQuery(table.getQuery()), config);
+
+ int batchCount = 0;
+ VectorSchemaRoot prev = null;
+ while (iterator.hasNext()) {
+ VectorSchemaRoot cur = iterator.next();
+ assertNotNull(cur);
+
+ if (prev != null) {
+ // skip the first iteration
+
+ if (reuseVectorSchemaRoot) {
+ // when reuse is enabled, different iterations are based on the same vector schema root.
+ assertTrue(prev == cur);
+ } else {
+ // when reuse is enabled, a new vector schema root is created in each iteration.
+ assertFalse(prev == cur);
+ cur.close();
+ }
+ }
+
+ prev = cur;
+ batchCount += 1;
+ }
+
+ // make sure we have at least two batches, so the above test paths are actually covered
+ assertTrue(batchCount > 1);
+ }
+
+ @Test
+ public void testJdbcToArrowValuesNoLimit() throws SQLException, IOException {
+
+ JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE),
+ Calendar.getInstance()).setTargetBatchSize(JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE).build();
+
+ ArrowVectorIterator iterator =
+ JdbcToArrow.sqlToArrowVectorIterator(conn.createStatement().executeQuery(table.getQuery()), config);
+
+ validate(iterator);
+ }
+
+ @Test
+ public void testTimeStampConsumer() throws SQLException, IOException {
+ final String sql = "select timestamp_field11 from table1";
+
+ // first experiment, with calendar and time zone.
+ JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE),
+ Calendar.getInstance()).setTargetBatchSize(3).setReuseVectorSchemaRoot(reuseVectorSchemaRoot).build();
+ assertNotNull(config.getCalendar());
+
+ try (ArrowVectorIterator iterator =
+ JdbcToArrow.sqlToArrowVectorIterator(conn.createStatement().executeQuery(sql), config)) {
+ VectorSchemaRoot root = iterator.next();
+ assertEquals(1, root.getFieldVectors().size());
+
+ // vector with time zone info.
+ assertTrue(root.getVector(0) instanceof TimeStampMilliTZVector);
+ }
+
+ // second experiment, without calendar and time zone.
+ config = new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE),
+ null).setTargetBatchSize(3).setReuseVectorSchemaRoot(reuseVectorSchemaRoot).build();
+ assertNull(config.getCalendar());
+
+ try (ArrowVectorIterator iterator =
+ JdbcToArrow.sqlToArrowVectorIterator(conn.createStatement().executeQuery(sql), config)) {
+ VectorSchemaRoot root = iterator.next();
+ assertEquals(1, root.getFieldVectors().size());
+
+ // vector without time zone info.
+ assertTrue(root.getVector(0) instanceof TimeStampMilliVector);
+ }
+ }
+
+ private void validate(ArrowVectorIterator iterator) throws SQLException, IOException {
+
+ List<BigIntVector> bigIntVectors = new ArrayList<>();
+ List<TinyIntVector> tinyIntVectors = new ArrayList<>();
+ List<IntVector> intVectors = new ArrayList<>();
+ List<SmallIntVector> smallIntVectors = new ArrayList<>();
+ List<VarBinaryVector> vectorsForBinary = new ArrayList<>();
+ List<VarBinaryVector> vectorsForBlob = new ArrayList<>();
+ List<VarCharVector> vectorsForClob = new ArrayList<>();
+ List<VarCharVector> vectorsForVarChar = new ArrayList<>();
+ List<VarCharVector> vectorsForChar = new ArrayList<>();
+ List<BitVector> vectorsForBit = new ArrayList<>();
+ List<BitVector> vectorsForBool = new ArrayList<>();
+ List<DateDayVector> dateDayVectors = new ArrayList<>();
+ List<TimeMilliVector> timeMilliVectors = new ArrayList<>();
+ List<TimeStampVector> timeStampVectors = new ArrayList<>();
+ List<DecimalVector> decimalVectors = new ArrayList<>();
+ List<Float4Vector> float4Vectors = new ArrayList<>();
+ List<Float8Vector> float8Vectors = new ArrayList<>();
+
+ List<VectorSchemaRoot> roots = new ArrayList<>();
+ while (iterator.hasNext()) {
+ VectorSchemaRoot root = iterator.next();
+ roots.add(root);
+
+ JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root);
+
+ bigIntVectors.add((BigIntVector) root.getVector(BIGINT));
+ tinyIntVectors.add((TinyIntVector) root.getVector(TINYINT));
+ intVectors.add((IntVector) root.getVector(INT));
+ smallIntVectors.add((SmallIntVector) root.getVector(SMALLINT));
+ vectorsForBinary.add((VarBinaryVector) root.getVector(BINARY));
+ vectorsForBlob.add((VarBinaryVector) root.getVector(BLOB));
+ vectorsForClob.add((VarCharVector) root.getVector(CLOB));
+ vectorsForVarChar.add((VarCharVector) root.getVector(VARCHAR));
+ vectorsForChar.add((VarCharVector) root.getVector(CHAR));
+ vectorsForBit.add((BitVector) root.getVector(BIT));
+ vectorsForBool.add((BitVector) root.getVector(BOOL));
+ dateDayVectors.add((DateDayVector) root.getVector(DATE));
+ timeMilliVectors.add((TimeMilliVector) root.getVector(TIME));
+ timeStampVectors.add((TimeStampVector) root.getVector(TIMESTAMP));
+ decimalVectors.add((DecimalVector) root.getVector(DECIMAL));
+ float4Vectors.add((Float4Vector) root.getVector(REAL));
+ float8Vectors.add((Float8Vector) root.getVector(DOUBLE));
+
+ }
+ assertBigIntVectorValues(bigIntVectors, table.getRowCount(), getLongValues(table.getValues(), BIGINT));
+ assertTinyIntVectorValues(tinyIntVectors, table.getRowCount(), getIntValues(table.getValues(), TINYINT));
+ assertIntVectorValues(intVectors, table.getRowCount(), getIntValues(table.getValues(), INT));
+ assertSmallIntVectorValues(smallIntVectors, table.getRowCount(), getIntValues(table.getValues(), SMALLINT));
+ assertBinaryVectorValues(vectorsForBinary, table.getRowCount(), getBinaryValues(table.getValues(), BINARY));
+ assertBinaryVectorValues(vectorsForBlob, table.getRowCount(), getBinaryValues(table.getValues(), BLOB));
+ assertVarCharVectorValues(vectorsForClob, table.getRowCount(), getCharArray(table.getValues(), CLOB));
+ assertVarCharVectorValues(vectorsForVarChar, table.getRowCount(), getCharArray(table.getValues(), VARCHAR));
+ assertVarCharVectorValues(vectorsForChar, table.getRowCount(), getCharArray(table.getValues(), CHAR));
+ assertBitVectorValues(vectorsForBit, table.getRowCount(), getIntValues(table.getValues(), BIT));
+ assertBooleanVectorValues(vectorsForBool, table.getRowCount(), getBooleanValues(table.getValues(), BOOL));
+ assertDateDayVectorValues(dateDayVectors, table.getRowCount(), getLongValues(table.getValues(), DATE));
+ assertTimeMilliVectorValues(timeMilliVectors, table.getRowCount(), getLongValues(table.getValues(), TIME));
+ assertTimeStampVectorValues(timeStampVectors, table.getRowCount(), getLongValues(table.getValues(), TIMESTAMP));
+ assertDecimalVectorValues(decimalVectors, table.getRowCount(), getDecimalValues(table.getValues(), DECIMAL));
+ assertFloat4VectorValues(float4Vectors, table.getRowCount(), getFloatValues(table.getValues(), REAL));
+ assertFloat8VectorValues(float8Vectors, table.getRowCount(), getDoubleValues(table.getValues(), DOUBLE));
+
+ roots.forEach(root -> root.close());
+ }
+
+ private void assertFloat8VectorValues(List<Float8Vector> vectors, int rowCount, Double[] values) {
+ int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum();
+ assertEquals(rowCount, valueCount);
+
+ int index = 0;
+ for (Float8Vector vector : vectors) {
+ for (int i = 0; i < vector.getValueCount(); i++) {
+ assertEquals(values[index++].doubleValue(), vector.get(i), 0.01);
+ }
+ }
+ }
+
+ private void assertFloat4VectorValues(List<Float4Vector> vectors, int rowCount, Float[] values) {
+ int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum();
+ assertEquals(rowCount, valueCount);
+
+ int index = 0;
+ for (Float4Vector vector : vectors) {
+ for (int i = 0; i < vector.getValueCount(); i++) {
+ assertEquals(values[index++].floatValue(), vector.get(i), 0.01);
+ }
+ }
+ }
+
+ private void assertDecimalVectorValues(List<DecimalVector> vectors, int rowCount, BigDecimal[] values) {
+ int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum();
+ assertEquals(rowCount, valueCount);
+
+ int index = 0;
+ for (DecimalVector vector : vectors) {
+ for (int i = 0; i < vector.getValueCount(); i++) {
+ assertNotNull(vector.getObject(i));
+ assertEquals(values[index++].doubleValue(), vector.getObject(i).doubleValue(), 0);
+ }
+ }
+ }
+
+ private void assertTimeStampVectorValues(List<TimeStampVector> vectors, int rowCount, Long[] values) {
+ int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum();
+ assertEquals(rowCount, valueCount);
+
+ int index = 0;
+ for (TimeStampVector vector : vectors) {
+ for (int i = 0; i < vector.getValueCount(); i++) {
+ assertEquals(values[index++].longValue(), vector.get(i));
+ }
+ }
+ }
+
+ private void assertTimeMilliVectorValues(List<TimeMilliVector> vectors, int rowCount, Long[] values) {
+ int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum();
+ assertEquals(rowCount, valueCount);
+
+ int index = 0;
+ for (TimeMilliVector vector : vectors) {
+ for (int i = 0; i < vector.getValueCount(); i++) {
+ assertEquals(values[index++].longValue(), vector.get(i));
+ }
+ }
+ }
+
+ private void assertDateDayVectorValues(List<DateDayVector> vectors, int rowCount, Long[] values) {
+ int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum();
+ assertEquals(rowCount, valueCount);
+
+ int index = 0;
+ for (DateDayVector vector : vectors) {
+ for (int i = 0; i < vector.getValueCount(); i++) {
+ assertEquals(values[index++].longValue(), vector.get(i));
+ }
+ }
+ }
+
+ private void assertBitVectorValues(List<BitVector> vectors, int rowCount, Integer[] values) {
+ int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum();
+ assertEquals(rowCount, valueCount);
+
+ int index = 0;
+ for (BitVector vector : vectors) {
+ for (int i = 0; i < vector.getValueCount(); i++) {
+ assertEquals(values[index++].intValue(), vector.get(i));
+ }
+ }
+ }
+
+ private void assertBooleanVectorValues(List<BitVector> vectors, int rowCount, Boolean[] values) {
+ int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum();
+ assertEquals(rowCount, valueCount);
+
+ int index = 0;
+ for (BitVector vector : vectors) {
+ for (int i = 0; i < vector.getValueCount(); i++) {
+ assertEquals(values[index++], vector.get(i) == 1);
+ }
+ }
+ }
+
+ private void assertVarCharVectorValues(List<VarCharVector> vectors, int rowCount, byte[][] values) {
+ int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum();
+ assertEquals(rowCount, valueCount);
+
+ int index = 0;
+ for (VarCharVector vector : vectors) {
+ for (int i = 0; i < vector.getValueCount(); i++) {
+ assertArrayEquals(values[index++], vector.get(i));
+ }
+ }
+ }
+
+ private void assertBinaryVectorValues(List<VarBinaryVector> vectors, int rowCount, byte[][] values) {
+ int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum();
+ assertEquals(rowCount, valueCount);
+
+ int index = 0;
+ for (VarBinaryVector vector : vectors) {
+ for (int i = 0; i < vector.getValueCount(); i++) {
+ assertArrayEquals(values[index++], vector.get(i));
+ }
+ }
+ }
+
+ private void assertSmallIntVectorValues(List<SmallIntVector> vectors, int rowCount, Integer[] values) {
+ int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum();
+ assertEquals(rowCount, valueCount);
+
+ int index = 0;
+ for (SmallIntVector vector : vectors) {
+ for (int i = 0; i < vector.getValueCount(); i++) {
+ assertEquals(values[index++].intValue(), vector.get(i));
+ }
+ }
+ }
+
+ private void assertTinyIntVectorValues(List<TinyIntVector> vectors, int rowCount, Integer[] values) {
+ int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum();
+ assertEquals(rowCount, valueCount);
+
+ int index = 0;
+ for (TinyIntVector vector : vectors) {
+ for (int i = 0; i < vector.getValueCount(); i++) {
+ assertEquals(values[index++].intValue(), vector.get(i));
+ }
+ }
+ }
+
+ private void assertBigIntVectorValues(List<BigIntVector> vectors, int rowCount, Long[] values) {
+ int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum();
+ assertEquals(rowCount, valueCount);
+
+ int index = 0;
+ for (BigIntVector vector : vectors) {
+ for (int i = 0; i < vector.getValueCount(); i++) {
+ assertEquals(values[index++].longValue(), vector.get(i));
+ }
+ }
+ }
+
+ private void assertIntVectorValues(List<IntVector> vectors, int rowCount, Integer[] values) {
+ int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum();
+ assertEquals(rowCount, valueCount);
+
+ int index = 0;
+ for (IntVector vector : vectors) {
+ for (int i = 0; i < vector.getValueCount(); i++) {
+ assertEquals(values[index++].intValue(), vector.get(i));
+ }
+ }
+ }
+
+ /**
+ * Runs a simple query, and encapsulates the result into a field vector.
+ */
+ private FieldVector getQueryResult(JdbcToArrowConfig config) throws SQLException, IOException {
+ ArrowVectorIterator iterator = JdbcToArrow.sqlToArrowVectorIterator(
+ conn.createStatement().executeQuery("select real_field8 from table1"), config);
+
+ VectorSchemaRoot root = iterator.next();
+
+ // only one vector, since there is one column in the select statement.
+ assertEquals(1, root.getFieldVectors().size());
+ FieldVector result = root.getVector(0);
+
+ // make sure some data is actually read
+ assertTrue(result.getValueCount() > 0);
+
+ return result;
+ }
+
+ @Test
+ public void testJdbcToArrowCustomTypeConversion() throws SQLException, IOException {
+ JdbcToArrowConfigBuilder builder = new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE),
+ Calendar.getInstance()).setTargetBatchSize(JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE)
+ .setReuseVectorSchemaRoot(reuseVectorSchemaRoot);
+
+ // first experiment, using default type converter
+ JdbcToArrowConfig config = builder.build();
+
+ try (FieldVector vector = getQueryResult(config)) {
+ // the default converter translates real to float4
+ assertTrue(vector instanceof Float4Vector);
+ }
+
+ // second experiment, using customized type converter
+ builder.setJdbcToArrowTypeConverter(fieldInfo -> {
+ switch (fieldInfo.getJdbcType()) {
+ case Types.REAL:
+ // this is different from the default type converter
+ return new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE);
+ default:
+ return null;
+ }
+ });
+ config = builder.build();
+
+ try (FieldVector vector = getQueryResult(config)) {
+ // the customized converter translates real to float8
+ assertTrue(vector instanceof Float8Vector);
+ }
+ }
+}
diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_all_datatypes_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_all_datatypes_h2.yml
new file mode 100644
index 000000000..9baae643a
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_all_datatypes_h2.yml
@@ -0,0 +1,121 @@
+#Licensed to the Apache Software Foundation (ASF) under one or more contributor
+#license agreements. See the NOTICE file distributed with this work for additional
+#information regarding copyright ownership. The ASF licenses this file to
+#You under the Apache License, Version 2.0 (the "License"); you may not use
+#this file except in compliance with the License. You may obtain a copy of
+#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+#by applicable law or agreed to in writing, software distributed under the
+#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+#OF ANY KIND, either express or implied. See the License for the specific
+#language governing permissions and limitations under the License.
+
+name: 'table1'
+
+create: 'CREATE TABLE table1 (int_field1 INT, bool_field2 BOOLEAN, tinyint_field3 TINYINT, smallint_field4 SMALLINT, bigint_field5 BIGINT,
+ decimal_field6 DECIMAL(20,2), double_field7 DOUBLE, real_field8 REAL, time_field9 TIME, date_field10 DATE, timestamp_field11 TIMESTAMP,
+ binary_field12 BINARY(100), varchar_field13 VARCHAR(256), blob_field14 BLOB, clob_field15 CLOB, char_field16 CHAR(16), bit_field17 BIT,
+ null_field18 NULL);'
+
+data:
+ - 'INSERT INTO table1 VALUES (101, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''),
+ PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''),
+ ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'',
+ ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null);'
+
+ - 'INSERT INTO table1 VALUES (101, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''),
+ PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''),
+ ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'',
+ ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null);'
+
+ - 'INSERT INTO table1 VALUES (101, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''),
+ PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''),
+ ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'',
+ ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null);'
+
+ - 'INSERT INTO table1 VALUES (101, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''),
+ PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''),
+ ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'',
+ ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null);'
+
+ - 'INSERT INTO table1 VALUES (101, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''),
+ PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''),
+ ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'',
+ ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null);'
+
+ - 'INSERT INTO table1 VALUES (101, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''),
+ PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''),
+ ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'',
+ ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null);'
+
+ - 'INSERT INTO table1 VALUES (101, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''),
+ PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''),
+ ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'',
+ ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null);'
+
+ - 'INSERT INTO table1 VALUES (101, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''),
+ PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''),
+ ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'',
+ ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null);'
+
+ - 'INSERT INTO table1 VALUES (101, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''),
+ PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''),
+ ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'',
+ ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null);'
+
+ - 'INSERT INTO table1 VALUES (101, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''),
+ PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''),
+ ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'',
+ ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null);'
+
+query: 'select int_field1, bool_field2, tinyint_field3, smallint_field4, bigint_field5, decimal_field6, double_field7, real_field8,
+ time_field9, date_field10, timestamp_field11, binary_field12, varchar_field13, blob_field14, clob_field15, char_field16, bit_field17, null_field18 from table1'
+
+drop: 'DROP table table1;'
+
+rowCount: '10'
+
+values:
+ - 'INT_FIELD1=101,101,101,101,101,101,101,101,101,101'
+ - 'BOOL_FIELD2=1,1,1,1,1,1,1,1,1,1'
+ - 'BIT_FIELD17=1,1,1,1,1,1,1,1,1,1'
+ - 'TINYINT_FIELD3=45,45,45,45,45,45,45,45,45,45'
+ - 'SMALLINT_FIELD4=12000,12000,12000,12000,12000,12000,12000,12000,12000,12000'
+ - 'BIGINT_FIELD5=92233720,92233720,92233720,92233720,92233720,92233720,92233720,92233720,92233720,92233720'
+ - 'REAL_FIELD8=56478356785.345f,56478356785.345f,56478356785.345f,56478356785.345f,56478356785.345f,56478356785.345f,56478356785.345f,56478356785.345f,56478356785.345f,56478356785.345f'
+ - 'DECIMAL_FIELD6=17345667789.23,17345667789.23,17345667789.23,17345667789.23,17345667789.23,17345667789.23,17345667789.23,17345667789.23,17345667789.23,17345667789.23'
+ - 'DOUBLE_FIELD7=56478356785.345,56478356785.345,56478356785.345,56478356785.345,56478356785.345,56478356785.345,56478356785.345,56478356785.345,56478356785.345,56478356785.345'
+ - 'TIME_FIELD9=45935000,45935000,45935000,45935000,45935000,45935000,45935000,45935000,45935000,45935000'
+ - 'DATE_FIELD10=17574,17574,17574,17574,17574,17574,17574,17574,17574,17574'
+ - 'TIMESTAMP_FIELD11=1518439535000,1518439535000,1518439535000,1518439535000,1518439535000,1518439535000,1518439535000,1518439535000,1518439535000,1518439535000'
+ - 'CHAR_FIELD16=some char text,some char text,some char text,some char text,some char text,
+ some char text,some char text,some char text,some char text,some char text'
+ - 'VARCHAR_FIELD13=some text that needs to be converted to varchar,some text that needs to be converted to varchar,
+ some text that needs to be converted to varchar,some text that needs to be converted to varchar,
+ some text that needs to be converted to varchar,some text that needs to be converted to varchar,
+ some text that needs to be converted to varchar,some text that needs to be converted to varchar,
+ some text that needs to be converted to varchar,some text that needs to be converted to varchar'
+ - 'BINARY_FIELD12=736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279,
+ 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279,
+ 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279,
+ 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279,
+ 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279,
+ 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279,
+ 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279,
+ 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279,
+ 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279,
+ 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'
+ - 'BLOB_FIELD14=736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279,
+ 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279,
+ 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279,
+ 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279,
+ 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279,
+ 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279,
+ 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279,
+ 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279,
+ 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279,
+ 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'
+ - 'CLOB_FIELD15=some text that needs to be converted to clob,some text that needs to be converted to clob,
+ some text that needs to be converted to clob,some text that needs to be converted to clob,
+ some text that needs to be converted to clob,some text that needs to be converted to clob,
+ some text that needs to be converted to clob,some text that needs to be converted to clob,
+ some text that needs to be converted to clob,some text that needs to be converted to clob'
diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_all_datatypes_null_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_all_datatypes_null_h2.yml
new file mode 100644
index 000000000..977879df4
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_all_datatypes_null_h2.yml
@@ -0,0 +1,51 @@
+#Licensed to the Apache Software Foundation (ASF) under one or more contributor
+#license agreements. See the NOTICE file distributed with this work for additional
+#information regarding copyright ownership. The ASF licenses this file to
+#You under the Apache License, Version 2.0 (the "License"); you may not use
+#this file except in compliance with the License. You may obtain a copy of
+#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+#by applicable law or agreed to in writing, software distributed under the
+#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+#OF ANY KIND, either express or implied. See the License for the specific
+#language governing permissions and limitations under the License.
+
+name: 'table1'
+
+type: 'null'
+
+vectors:
+ - 'INT_FIELD1'
+ - 'BOOL_FIELD2'
+ - 'TINYINT_FIELD3'
+ - 'SMALLINT_FIELD4'
+ - 'BIGINT_FIELD5'
+ - 'DECIMAL_FIELD6'
+ - 'DOUBLE_FIELD7'
+ - 'REAL_FIELD8'
+ - 'TIME_FIELD9'
+ - 'DATE_FIELD10'
+ - 'TIMESTAMP_FIELD11'
+ - 'BINARY_FIELD12'
+ - 'VARCHAR_FIELD13'
+ - 'BLOB_FIELD14'
+ - 'CLOB_FIELD15'
+ - 'CHAR_FIELD16'
+ - 'BIT_FIELD17'
+
+rowCount: '5'
+
+create: 'CREATE TABLE table1 (int_field1 INT, bool_field2 BOOLEAN, tinyint_field3 TINYINT, smallint_field4 SMALLINT, bigint_field5 BIGINT,
+ decimal_field6 DECIMAL(20,2), double_field7 DOUBLE, real_field8 REAL, time_field9 TIME, date_field10 DATE, timestamp_field11 TIMESTAMP,
+ binary_field12 BINARY(100), varchar_field13 VARCHAR(256), blob_field14 BLOB, clob_field15 CLOB, char_field16 CHAR(16), bit_field17 BIT);'
+
+data:
+ - 'INSERT INTO table1 VALUES (null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null);'
+ - 'INSERT INTO table1 VALUES (null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null);'
+ - 'INSERT INTO table1 VALUES (null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null);'
+ - 'INSERT INTO table1 VALUES (null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null);'
+ - 'INSERT INTO table1 VALUES (null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null);'
+
+query: 'select int_field1, bool_field2, tinyint_field3, smallint_field4, bigint_field5, decimal_field6, double_field7, real_field8,
+ time_field9, date_field10, timestamp_field11, binary_field12, varchar_field13, blob_field14, clob_field15, char_field16, bit_field17 from table1'
+
+drop: 'DROP table table1;' \ No newline at end of file
diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_all_datatypes_selected_null_rows_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_all_datatypes_selected_null_rows_h2.yml
new file mode 100644
index 000000000..4be8ab86e
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_all_datatypes_selected_null_rows_h2.yml
@@ -0,0 +1,83 @@
+#Licensed to the Apache Software Foundation (ASF) under one or more contributor
+#license agreements. See the NOTICE file distributed with this work for additional
+#information regarding copyright ownership. The ASF licenses this file to
+#You under the Apache License, Version 2.0 (the "License"); you may not use
+#this file except in compliance with the License. You may obtain a copy of
+#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+#by applicable law or agreed to in writing, software distributed under the
+#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+#OF ANY KIND, either express or implied. See the License for the specific
+#language governing permissions and limitations under the License.
+
+name: 'table1'
+
+type: 'selected_null_row'
+
+vectors:
+ - 'INT_FIELD1'
+ - 'BOOL_FIELD2'
+ - 'TINYINT_FIELD3'
+ - 'SMALLINT_FIELD4'
+ - 'BIGINT_FIELD5'
+ - 'DECIMAL_FIELD6'
+ - 'DOUBLE_FIELD7'
+ - 'REAL_FIELD8'
+ - 'TIME_FIELD9'
+ - 'DATE_FIELD10'
+ - 'TIMESTAMP_FIELD11'
+ - 'BINARY_FIELD12'
+ - 'VARCHAR_FIELD13'
+ - 'BLOB_FIELD14'
+ - 'CLOB_FIELD15'
+ - 'CHAR_FIELD16'
+ - 'BIT_FIELD17'
+
+create: 'CREATE TABLE table1 (int_field1 INT, bool_field2 BOOLEAN, tinyint_field3 TINYINT, smallint_field4 SMALLINT, bigint_field5 BIGINT,
+ decimal_field6 DECIMAL(20,2), double_field7 DOUBLE, real_field8 REAL, time_field9 TIME, date_field10 DATE, timestamp_field11 TIMESTAMP,
+ binary_field12 BINARY(100), varchar_field13 VARCHAR(256), blob_field14 BLOB, clob_field15 CLOB, char_field16 CHAR(16), bit_field17 BIT);'
+
+data:
+ - 'INSERT INTO table1 VALUES (null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null);'
+
+ - 'INSERT INTO table1 VALUES (101, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''),
+ PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''),
+ ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'',
+ ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1);'
+
+ - 'INSERT INTO table1 VALUES (null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null);'
+
+ - 'INSERT INTO table1 VALUES (101, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''),
+ PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''),
+ ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'',
+ ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1);'
+
+ - 'INSERT INTO table1 VALUES (null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null);'
+
+query: 'select int_field1, bool_field2, tinyint_field3, smallint_field4, bigint_field5, decimal_field6, double_field7, real_field8,
+ time_field9, date_field10, timestamp_field11, binary_field12, varchar_field13, blob_field14, clob_field15, char_field16, bit_field17 from table1'
+
+drop: 'DROP table table1;'
+
+rowCount: '5'
+
+values:
+ - 'INT_FIELD1=null,101,null,101,null'
+ - 'BOOL_FIELD2=null,1,null,1,null'
+ - 'BIT_FIELD17=null,1,null,1,null'
+ - 'TINYINT_FIELD3=null,45,null,45,null'
+ - 'SMALLINT_FIELD4=null,12000,null,12000,null'
+ - 'BIGINT_FIELD5=null,92233720,null,92233720,null'
+ - 'REAL_FIELD8=null,56478356785.345f,null,56478356785.345f,null'
+ - 'DECIMAL_FIELD6=null,17345667789.23,null,17345667789.23,null'
+ - 'DOUBLE_FIELD7=null,56478356785.345,null,56478356785.345,null'
+ - 'TIME_FIELD9=null,45935000,null,45935000,null'
+ - 'DATE_FIELD10=null,17574,null,17574,null'
+ - 'TIMESTAMP_FIELD11=null,1518439535000,null,1518439535000,null'
+ - 'CHAR_FIELD16=null,some char text,null,some char text,null'
+ - 'VARCHAR_FIELD13=null,some text that needs to be converted to varchar,null,
+ some text that needs to be converted to varchar,null'
+ - 'BINARY_FIELD12=null,736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279,
+ null,736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279,null'
+ - 'BLOB_FIELD14=null,736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279,
+ null,736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279,null'
+ - 'CLOB_FIELD15=null,some text that needs to be converted to clob,null,some text that needs to be converted to clob,null'
diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_bigint_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_bigint_h2.yml
new file mode 100644
index 000000000..066bececf
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_bigint_h2.yml
@@ -0,0 +1,46 @@
+#Licensed to the Apache Software Foundation (ASF) under one or more contributor
+#license agreements. See the NOTICE file distributed with this work for additional
+#information regarding copyright ownership. The ASF licenses this file to
+#You under the Apache License, Version 2.0 (the "License"); you may not use
+#this file except in compliance with the License. You may obtain a copy of
+#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+#by applicable law or agreed to in writing, software distributed under the
+#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+#OF ANY KIND, either express or implied. See the License for the specific
+#language governing permissions and limitations under the License.
+
+name: 'table1'
+
+type: 'big_int'
+
+vector: 'BIGINT_FIELD5'
+
+create: 'CREATE TABLE table1 (bigint_field5 BIGINT);'
+
+data:
+ - 'INSERT INTO table1 VALUES (92233720);'
+ - 'INSERT INTO table1 VALUES (92233720);'
+ - 'INSERT INTO table1 VALUES (92233720);'
+ - 'INSERT INTO table1 VALUES (92233720);'
+ - 'INSERT INTO table1 VALUES (92233720);'
+ - 'INSERT INTO table1 VALUES (92233720);'
+ - 'INSERT INTO table1 VALUES (92233720);'
+ - 'INSERT INTO table1 VALUES (92233720);'
+ - 'INSERT INTO table1 VALUES (92233720);'
+ - 'INSERT INTO table1 VALUES (92233720);'
+
+query: 'select bigint_field5 from table1;'
+
+drop: 'DROP table table1;'
+
+values:
+ - '92233720'
+ - '92233720'
+ - '92233720'
+ - '92233720'
+ - '92233720'
+ - '92233720'
+ - '92233720'
+ - '92233720'
+ - '92233720'
+ - '92233720'
diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_binary_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_binary_h2.yml
new file mode 100644
index 000000000..ce3e4f127
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_binary_h2.yml
@@ -0,0 +1,46 @@
+#Licensed to the Apache Software Foundation (ASF) under one or more contributor
+#license agreements. See the NOTICE file distributed with this work for additional
+#information regarding copyright ownership. The ASF licenses this file to
+#You under the Apache License, Version 2.0 (the "License"); you may not use
+#this file except in compliance with the License. You may obtain a copy of
+#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+#by applicable law or agreed to in writing, software distributed under the
+#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+#OF ANY KIND, either express or implied. See the License for the specific
+#language governing permissions and limitations under the License.
+
+name: 'table1'
+
+type: 'binary'
+
+vector: 'BINARY_FIELD12'
+
+create: 'CREATE TABLE table1 (binary_field12 BINARY(100));'
+
+data:
+ - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');'
+ - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');'
+ - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');'
+ - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');'
+ - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');'
+ - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');'
+ - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');'
+ - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');'
+ - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');'
+ - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');'
+
+query: 'select binary_field12 from table1;'
+
+drop: 'DROP table table1;'
+
+values:
+ - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'
+ - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'
+ - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'
+ - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'
+ - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'
+ - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'
+ - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'
+ - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'
+ - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'
+ - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'
diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_bit_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_bit_h2.yml
new file mode 100644
index 000000000..aeb7a20e0
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_bit_h2.yml
@@ -0,0 +1,46 @@
+#Licensed to the Apache Software Foundation (ASF) under one or more contributor
+#license agreements. See the NOTICE file distributed with this work for additional
+#information regarding copyright ownership. The ASF licenses this file to
+#You under the Apache License, Version 2.0 (the "License"); you may not use
+#this file except in compliance with the License. You may obtain a copy of
+#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+#by applicable law or agreed to in writing, software distributed under the
+#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+#OF ANY KIND, either express or implied. See the License for the specific
+#language governing permissions and limitations under the License.
+
+name: 'table1'
+
+type: 'bit'
+
+vector: 'BIT_FIELD17'
+
+create: 'CREATE TABLE table1 (bit_field17 BIT);'
+
+data:
+ - 'INSERT INTO table1 VALUES (1);'
+ - 'INSERT INTO table1 VALUES (1);'
+ - 'INSERT INTO table1 VALUES (1);'
+ - 'INSERT INTO table1 VALUES (1);'
+ - 'INSERT INTO table1 VALUES (1);'
+ - 'INSERT INTO table1 VALUES (1);'
+ - 'INSERT INTO table1 VALUES (1);'
+ - 'INSERT INTO table1 VALUES (1);'
+ - 'INSERT INTO table1 VALUES (1);'
+ - 'INSERT INTO table1 VALUES (1);'
+
+query: 'select bit_field17 from table1;'
+
+drop: 'DROP table table1;'
+
+values:
+ - '1'
+ - '1'
+ - '1'
+ - '1'
+ - '1'
+ - '1'
+ - '1'
+ - '1'
+ - '1'
+ - '1'
diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_blob_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_blob_h2.yml
new file mode 100644
index 000000000..b4cd2ca80
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_blob_h2.yml
@@ -0,0 +1,46 @@
+#Licensed to the Apache Software Foundation (ASF) under one or more contributor
+#license agreements. See the NOTICE file distributed with this work for additional
+#information regarding copyright ownership. The ASF licenses this file to
+#You under the Apache License, Version 2.0 (the "License"); you may not use
+#this file except in compliance with the License. You may obtain a copy of
+#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+#by applicable law or agreed to in writing, software distributed under the
+#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+#OF ANY KIND, either express or implied. See the License for the specific
+#language governing permissions and limitations under the License.
+
+name: 'table1'
+
+type: 'blob'
+
+vector: 'BLOB_FIELD14'
+
+create: 'CREATE TABLE table1 (blob_field14 BLOB);'
+
+data:
+ - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');'
+ - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');'
+ - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');'
+ - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');'
+ - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');'
+ - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');'
+ - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');'
+ - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');'
+ - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');'
+ - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');'
+
+query: 'select blob_field14 from table1;'
+
+drop: 'DROP table table1;'
+
+values:
+ - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'
+ - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'
+ - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'
+ - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'
+ - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'
+ - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'
+ - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'
+ - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'
+ - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'
+ - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'
diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_bool_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_bool_h2.yml
new file mode 100644
index 000000000..8219a55ec
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_bool_h2.yml
@@ -0,0 +1,46 @@
+#Licensed to the Apache Software Foundation (ASF) under one or more contributor
+#license agreements. See the NOTICE file distributed with this work for additional
+#information regarding copyright ownership. The ASF licenses this file to
+#You under the Apache License, Version 2.0 (the "License"); you may not use
+#this file except in compliance with the License. You may obtain a copy of
+#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+#by applicable law or agreed to in writing, software distributed under the
+#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+#OF ANY KIND, either express or implied. See the License for the specific
+#language governing permissions and limitations under the License.
+
+name: 'table1'
+
+type: 'bool'
+
+vector: 'BOOL_FIELD2'
+
+create: 'CREATE TABLE table1 (bool_field2 BOOLEAN);'
+
+data:
+ - 'INSERT INTO table1 VALUES (1);'
+ - 'INSERT INTO table1 VALUES (1);'
+ - 'INSERT INTO table1 VALUES (1);'
+ - 'INSERT INTO table1 VALUES (1);'
+ - 'INSERT INTO table1 VALUES (1);'
+ - 'INSERT INTO table1 VALUES (1);'
+ - 'INSERT INTO table1 VALUES (1);'
+ - 'INSERT INTO table1 VALUES (1);'
+ - 'INSERT INTO table1 VALUES (1);'
+ - 'INSERT INTO table1 VALUES (1);'
+
+query: 'select bool_field2 from table1;'
+
+drop: 'DROP table table1;'
+
+values:
+ - 'true'
+ - 'true'
+ - 'true'
+ - 'true'
+ - 'true'
+ - 'true'
+ - 'true'
+ - 'true'
+ - 'true'
+ - 'true'
diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_char_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_char_h2.yml
new file mode 100644
index 000000000..6e2cb185c
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_char_h2.yml
@@ -0,0 +1,46 @@
+#Licensed to the Apache Software Foundation (ASF) under one or more contributor
+#license agreements. See the NOTICE file distributed with this work for additional
+#information regarding copyright ownership. The ASF licenses this file to
+#You under the Apache License, Version 2.0 (the "License"); you may not use
+#this file except in compliance with the License. You may obtain a copy of
+#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+#by applicable law or agreed to in writing, software distributed under the
+#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+#OF ANY KIND, either express or implied. See the License for the specific
+#language governing permissions and limitations under the License.
+
+name: 'table1'
+
+type: 'char'
+
+vector: 'CHAR_FIELD16'
+
+create: 'CREATE TABLE table1 (char_field16 CHAR(16));'
+
+data:
+ - 'INSERT INTO table1 VALUES (''some char text'');'
+ - 'INSERT INTO table1 VALUES (''some char text'');'
+ - 'INSERT INTO table1 VALUES (''some char text'');'
+ - 'INSERT INTO table1 VALUES (''some char text'');'
+ - 'INSERT INTO table1 VALUES (''some char text'');'
+ - 'INSERT INTO table1 VALUES (''some char text'');'
+ - 'INSERT INTO table1 VALUES (''some char text'');'
+ - 'INSERT INTO table1 VALUES (''some char text'');'
+ - 'INSERT INTO table1 VALUES (''some char text'');'
+ - 'INSERT INTO table1 VALUES (''some char text'');'
+
+query: 'select char_field16 from table1;'
+
+drop: 'DROP table table1;'
+
+values:
+ - 'some char text'
+ - 'some char text'
+ - 'some char text'
+ - 'some char text'
+ - 'some char text'
+ - 'some char text'
+ - 'some char text'
+ - 'some char text'
+ - 'some char text'
+ - 'some char text'
diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_charset_ch_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_charset_ch_h2.yml
new file mode 100644
index 000000000..1b6d7d503
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_charset_ch_h2.yml
@@ -0,0 +1,43 @@
+#Licensed to the Apache Software Foundation (ASF) under one or more contributor
+#license agreements. See the NOTICE file distributed with this work for additional
+#information regarding copyright ownership. The ASF licenses this file to
+#You under the Apache License, Version 2.0 (the "License"); you may not use
+#this file except in compliance with the License. You may obtain a copy of
+#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+#by applicable law or agreed to in writing, software distributed under the
+#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+#OF ANY KIND, either express or implied. See the License for the specific
+#language governing permissions and limitations under the License.
+
+name: 'table1'
+
+type: 'charset'
+
+vectors:
+ - 'VARCHAR_FIELD13'
+ - 'CHAR_FIELD16'
+ - 'CLOB_FIELD15'
+
+rowCount: '5'
+
+charSet: 'GBK'
+
+create: 'CREATE TABLE table1 (int_field1 INT, varchar_field13 VARCHAR(256), clob_field15 CLOB, char_field16 CHAR(128));'
+
+data:
+ - 'INSERT INTO table1 VALUES (101,''一些帶有char編碼的文本需要轉換為varchar'', ''一些带有char编码的文本需要转换为clob'', ''一些char编码的字符文本'');'
+ - 'INSERT INTO table1 VALUES (101,''一些帶有char編碼的文本需要轉換為varchar'', ''一些带有char编码的文本需要转换为clob'', ''一些char编码的字符文本'');'
+ - 'INSERT INTO table1 VALUES (101,''一些帶有char編碼的文本需要轉換為varchar'', ''一些带有char编码的文本需要转换为clob'', ''一些char编码的字符文本'');'
+ - 'INSERT INTO table1 VALUES (101,''一些帶有char編碼的文本需要轉換為varchar'', ''一些带有char编码的文本需要转换为clob'', ''一些char编码的字符文本'');'
+ - 'INSERT INTO table1 VALUES (101,''一些帶有char編碼的文本需要轉換為varchar'', ''一些带有char编码的文本需要转换为clob'', ''一些char编码的字符文本'');'
+
+query: 'select varchar_field13, clob_field15, char_field16 from table1'
+
+drop: 'DROP table table1;'
+
+values:
+ - 'VARCHAR_FIELD13=一些帶有char編碼的文本需要轉換為varchar,一些帶有char編碼的文本需要轉換為varchar,一些帶有char編碼的文本需要轉換為varchar,
+ 一些帶有char編碼的文本需要轉換為varchar,一些帶有char編碼的文本需要轉換為varchar'
+ - 'CLOB_FIELD15=一些带有char编码的文本需要转换为clob,一些带有char编码的文本需要转换为clob,一些带有char编码的文本需要转换为clob,
+ 一些带有char编码的文本需要转换为clob,一些带有char编码的文本需要转换为clob'
+ - 'CHAR_FIELD16=一些char编码的字符文本,一些char编码的字符文本,一些char编码的字符文本,一些char编码的字符文本,一些char编码的字符文本' \ No newline at end of file
diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_charset_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_charset_h2.yml
new file mode 100644
index 000000000..75734a221
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_charset_h2.yml
@@ -0,0 +1,53 @@
+#Licensed to the Apache Software Foundation (ASF) under one or more contributor
+#license agreements. See the NOTICE file distributed with this work for additional
+#information regarding copyright ownership. The ASF licenses this file to
+#You under the Apache License, Version 2.0 (the "License"); you may not use
+#this file except in compliance with the License. You may obtain a copy of
+#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+#by applicable law or agreed to in writing, software distributed under the
+#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+#OF ANY KIND, either express or implied. See the License for the specific
+#language governing permissions and limitations under the License.
+
+name: 'table1'
+
+type: 'charset'
+
+vectors:
+ - 'VARCHAR_FIELD13'
+ - 'CHAR_FIELD16'
+ - 'CLOB_FIELD15'
+
+rowCount: '10'
+
+create: 'CREATE TABLE table1 (int_field1 INT, varchar_field13 VARCHAR(256), clob_field15 CLOB, char_field16 CHAR(128));'
+
+data:
+ - 'INSERT INTO table1 VALUES (101,''some text with char encoding that needs to be converted to varchar'', ''some text with char encoding that needs to be converted to clob'', ''some char text with char encoding'');'
+ - 'INSERT INTO table1 VALUES (101,''some text with char encoding that needs to be converted to varchar'', ''some text with char encoding that needs to be converted to clob'', ''some char text with char encoding'');'
+ - 'INSERT INTO table1 VALUES (101,''some text with char encoding that needs to be converted to varchar'', ''some text with char encoding that needs to be converted to clob'', ''some char text with char encoding'');'
+ - 'INSERT INTO table1 VALUES (101,''some text with char encoding that needs to be converted to varchar'', ''some text with char encoding that needs to be converted to clob'', ''some char text with char encoding'');'
+ - 'INSERT INTO table1 VALUES (101,''some text with char encoding that needs to be converted to varchar'', ''some text with char encoding that needs to be converted to clob'', ''some char text with char encoding'');'
+ - 'INSERT INTO table1 VALUES (101,''some text with char encoding that needs to be converted to varchar'', ''some text with char encoding that needs to be converted to clob'', ''some char text with char encoding'');'
+ - 'INSERT INTO table1 VALUES (101,''some text with char encoding that needs to be converted to varchar'', ''some text with char encoding that needs to be converted to clob'', ''some char text with char encoding'');'
+ - 'INSERT INTO table1 VALUES (101,''some text with char encoding that needs to be converted to varchar'', ''some text with char encoding that needs to be converted to clob'', ''some char text with char encoding'');'
+ - 'INSERT INTO table1 VALUES (101,''some text with char encoding that needs to be converted to varchar'', ''some text with char encoding that needs to be converted to clob'', ''some char text with char encoding'');'
+ - 'INSERT INTO table1 VALUES (101,''some text with char encoding that needs to be converted to varchar'', ''some text with char encoding that needs to be converted to clob'', ''some char text with char encoding'');'
+
+query: 'select varchar_field13, clob_field15, char_field16 from table1'
+
+drop: 'DROP table table1;'
+
+values:
+ - 'VARCHAR_FIELD13=some text with char encoding that needs to be converted to varchar,some text with char encoding that needs to be converted to varchar,
+ some text with char encoding that needs to be converted to varchar,some text with char encoding that needs to be converted to varchar,
+ some text with char encoding that needs to be converted to varchar,some text with char encoding that needs to be converted to varchar,
+ some text with char encoding that needs to be converted to varchar,some text with char encoding that needs to be converted to varchar,
+ some text with char encoding that needs to be converted to varchar,some text with char encoding that needs to be converted to varchar'
+ - 'CLOB_FIELD15=some text with char encoding that needs to be converted to clob,some text with char encoding that needs to be converted to clob,
+ some text with char encoding that needs to be converted to clob,some text with char encoding that needs to be converted to clob,
+ some text with char encoding that needs to be converted to clob,some text with char encoding that needs to be converted to clob,
+ some text with char encoding that needs to be converted to clob,some text with char encoding that needs to be converted to clob,
+ some text with char encoding that needs to be converted to clob,some text with char encoding that needs to be converted to clob'
+ - 'CHAR_FIELD16=some char text with char encoding,some char text with char encoding,some char text with char encoding,some char text with char encoding,some char text with char encoding,
+ some char text with char encoding,some char text with char encoding,some char text with char encoding,some char text with char encoding,some char text with char encoding' \ No newline at end of file
diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_charset_jp_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_charset_jp_h2.yml
new file mode 100644
index 000000000..10c33f443
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_charset_jp_h2.yml
@@ -0,0 +1,43 @@
+#Licensed to the Apache Software Foundation (ASF) under one or more contributor
+#license agreements. See the NOTICE file distributed with this work for additional
+#information regarding copyright ownership. The ASF licenses this file to
+#You under the Apache License, Version 2.0 (the "License"); you may not use
+#this file except in compliance with the License. You may obtain a copy of
+#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+#by applicable law or agreed to in writing, software distributed under the
+#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+#OF ANY KIND, either express or implied. See the License for the specific
+#language governing permissions and limitations under the License.
+
+name: 'table1'
+
+type: 'charset'
+
+vectors:
+ - 'VARCHAR_FIELD13'
+ - 'CHAR_FIELD16'
+ - 'CLOB_FIELD15'
+
+rowCount: '5'
+
+charSet: 'SJIS'
+
+create: 'CREATE TABLE table1 (int_field1 INT, varchar_field13 VARCHAR(256), clob_field15 CLOB, char_field16 CHAR(128));'
+
+data:
+ - 'INSERT INTO table1 VALUES (101,''varcharに変換する必要があるcharエンコーディングのテキスト'', ''charエンコーディングのあるテキストをclobに変換する必要がある'', ''charエンコーディングのあるcharテキスト'');'
+ - 'INSERT INTO table1 VALUES (101,''varcharに変換する必要があるcharエンコーディングのテキスト'', ''charエンコーディングのあるテキストをclobに変換する必要がある'', ''charエンコーディングのあるcharテキスト'');'
+ - 'INSERT INTO table1 VALUES (101,''varcharに変換する必要があるcharエンコーディングのテキスト'', ''charエンコーディングのあるテキストをclobに変換する必要がある'', ''charエンコーディングのあるcharテキスト'');'
+ - 'INSERT INTO table1 VALUES (101,''varcharに変換する必要があるcharエンコーディングのテキスト'', ''charエンコーディングのあるテキストをclobに変換する必要がある'', ''charエンコーディングのあるcharテキスト'');'
+ - 'INSERT INTO table1 VALUES (101,''varcharに変換する必要があるcharエンコーディングのテキスト'', ''charエンコーディングのあるテキストをclobに変換する必要がある'', ''charエンコーディングのあるcharテキスト'');'
+
+query: 'select varchar_field13, clob_field15, char_field16 from table1'
+
+drop: 'DROP table table1;'
+
+values:
+ - 'VARCHAR_FIELD13=varcharに変換する必要があるcharエンコーディングのテキスト,varcharに変換する必要があるcharエンコーディングのテキスト,varcharに変換する必要があるcharエンコーディングのテキスト,
+ varcharに変換する必要があるcharエンコーディングのテキスト,varcharに変換する必要があるcharエンコーディングのテキスト'
+ - 'CLOB_FIELD15=charエンコーディングのあるテキストをclobに変換する必要がある,charエンコーディングのあるテキストをclobに変換する必要がある,charエンコーディングのあるテキストをclobに変換する必要がある,
+ charエンコーディングのあるテキストをclobに変換する必要がある,charエンコーディングのあるテキストをclobに変換する必要がある'
+ - 'CHAR_FIELD16=charエンコーディングのあるcharテキスト,charエンコーディングのあるcharテキスト,charエンコーディングのあるcharテキスト,charエンコーディングのあるcharテキスト,charエンコーディングのあるcharテキスト' \ No newline at end of file
diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_charset_kr_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_charset_kr_h2.yml
new file mode 100644
index 000000000..a00a41b53
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_charset_kr_h2.yml
@@ -0,0 +1,43 @@
+#Licensed to the Apache Software Foundation (ASF) under one or more contributor
+#license agreements. See the NOTICE file distributed with this work for additional
+#information regarding copyright ownership. The ASF licenses this file to
+#You under the Apache License, Version 2.0 (the "License"); you may not use
+#this file except in compliance with the License. You may obtain a copy of
+#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+#by applicable law or agreed to in writing, software distributed under the
+#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+#OF ANY KIND, either express or implied. See the License for the specific
+#language governing permissions and limitations under the License.
+
+name: 'table1'
+
+type: 'charset'
+
+vectors:
+ - 'VARCHAR_FIELD13'
+ - 'CHAR_FIELD16'
+ - 'CLOB_FIELD15'
+
+rowCount: '5'
+
+charSet: 'EUC-KR'
+
+create: 'CREATE TABLE table1 (int_field1 INT, varchar_field13 VARCHAR(256), clob_field15 CLOB, char_field16 CHAR(128));'
+
+data:
+ - 'INSERT INTO table1 VALUES (101,''char 인코딩을 사용하는 일부 텍스트를 varchar로 변환해야합니다.'', ''clob로 변환해야하는 char 인코딩을 가진 텍스트'', ''char 인코딩을 사용한 char 텍스트'');'
+ - 'INSERT INTO table1 VALUES (101,''char 인코딩을 사용하는 일부 텍스트를 varchar로 변환해야합니다.'', ''clob로 변환해야하는 char 인코딩을 가진 텍스트'', ''char 인코딩을 사용한 char 텍스트'');'
+ - 'INSERT INTO table1 VALUES (101,''char 인코딩을 사용하는 일부 텍스트를 varchar로 변환해야합니다.'', ''clob로 변환해야하는 char 인코딩을 가진 텍스트'', ''char 인코딩을 사용한 char 텍스트'');'
+ - 'INSERT INTO table1 VALUES (101,''char 인코딩을 사용하는 일부 텍스트를 varchar로 변환해야합니다.'', ''clob로 변환해야하는 char 인코딩을 가진 텍스트'', ''char 인코딩을 사용한 char 텍스트'');'
+ - 'INSERT INTO table1 VALUES (101,''char 인코딩을 사용하는 일부 텍스트를 varchar로 변환해야합니다.'', ''clob로 변환해야하는 char 인코딩을 가진 텍스트'', ''char 인코딩을 사용한 char 텍스트'');'
+
+query: 'select varchar_field13, clob_field15, char_field16 from table1'
+
+drop: 'DROP table table1;'
+
+values:
+ - 'VARCHAR_FIELD13=char 인코딩을 사용하는 일부 텍스트를 varchar로 변환해야합니다.,char 인코딩을 사용하는 일부 텍스트를 varchar로 변환해야합니다.,char 인코딩을 사용하는 일부 텍스트를 varchar로 변환해야합니다.,
+ char 인코딩을 사용하는 일부 텍스트를 varchar로 변환해야합니다.,char 인코딩을 사용하는 일부 텍스트를 varchar로 변환해야합니다.'
+ - 'CLOB_FIELD15=clob로 변환해야하는 char 인코딩을 가진 텍스트,clob로 변환해야하는 char 인코딩을 가진 텍스트,clob로 변환해야하는 char 인코딩을 가진 텍스트,
+ clob로 변환해야하는 char 인코딩을 가진 텍스트,clob로 변환해야하는 char 인코딩을 가진 텍스트'
+ - 'CHAR_FIELD16=char 인코딩을 사용한 char 텍스트,char 인코딩을 사용한 char 텍스트,char 인코딩을 사용한 char 텍스트,char 인코딩을 사용한 char 텍스트,char 인코딩을 사용한 char 텍스트' \ No newline at end of file
diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_clob_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_clob_h2.yml
new file mode 100644
index 000000000..57c69ffe0
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_clob_h2.yml
@@ -0,0 +1,46 @@
+#Licensed to the Apache Software Foundation (ASF) under one or more contributor
+#license agreements. See the NOTICE file distributed with this work for additional
+#information regarding copyright ownership. The ASF licenses this file to
+#You under the Apache License, Version 2.0 (the "License"); you may not use
+#this file except in compliance with the License. You may obtain a copy of
+#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+#by applicable law or agreed to in writing, software distributed under the
+#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+#OF ANY KIND, either express or implied. See the License for the specific
+#language governing permissions and limitations under the License.
+
+name: 'table1'
+
+type: 'clob'
+
+vector: 'CLOB_FIELD15'
+
+create: 'CREATE TABLE table1 (clob_field15 CLOB);'
+
+data:
+ - 'INSERT INTO table1 VALUES (''some text that needs to be converted to clob'');'
+ - 'INSERT INTO table1 VALUES (''some text that needs to be converted to clob'');'
+ - 'INSERT INTO table1 VALUES (''some text that needs to be converted to clob'');'
+ - 'INSERT INTO table1 VALUES (''some text that needs to be converted to clob'');'
+ - 'INSERT INTO table1 VALUES (''some text that needs to be converted to clob'');'
+ - 'INSERT INTO table1 VALUES (''some text that needs to be converted to clob'');'
+ - 'INSERT INTO table1 VALUES (''some text that needs to be converted to clob'');'
+ - 'INSERT INTO table1 VALUES (''some text that needs to be converted to clob'');'
+ - 'INSERT INTO table1 VALUES (''some text that needs to be converted to clob'');'
+ - 'INSERT INTO table1 VALUES (''some text that needs to be converted to clob'');'
+
+query: 'select CLOB_FIELD15 from table1;'
+
+drop: 'DROP table table1;'
+
+values:
+ - 'some text that needs to be converted to clob'
+ - 'some text that needs to be converted to clob'
+ - 'some text that needs to be converted to clob'
+ - 'some text that needs to be converted to clob'
+ - 'some text that needs to be converted to clob'
+ - 'some text that needs to be converted to clob'
+ - 'some text that needs to be converted to clob'
+ - 'some text that needs to be converted to clob'
+ - 'some text that needs to be converted to clob'
+ - 'some text that needs to be converted to clob'
diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_date_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_date_h2.yml
new file mode 100644
index 000000000..bca886ceb
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_date_h2.yml
@@ -0,0 +1,48 @@
+#Licensed to the Apache Software Foundation (ASF) under one or more contributor
+#license agreements. See the NOTICE file distributed with this work for additional
+#information regarding copyright ownership. The ASF licenses this file to
+#You under the Apache License, Version 2.0 (the "License"); you may not use
+#this file except in compliance with the License. You may obtain a copy of
+#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+#by applicable law or agreed to in writing, software distributed under the
+#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+#OF ANY KIND, either express or implied. See the License for the specific
+#language governing permissions and limitations under the License.
+
+name: 'table1'
+
+type: 'date'
+
+vector: 'DATE_FIELD10'
+
+create: 'CREATE TABLE table1 (date_field10 DATE);'
+
+data:
+ - 'INSERT INTO table1 VALUES (''2018-02-12'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12'');'
+ - 'INSERT INTO table1 VALUES (''1969-01-01'');'
+
+query: 'select date_field10 from table1;'
+
+drop: 'DROP table table1;'
+
+values:
+ - '17574'
+ - '17574'
+ - '17574'
+ - '17574'
+ - '17574'
+ - '17574'
+ - '17574'
+ - '17574'
+ - '17574'
+ - '17574'
+ - '-365'
diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_decimal_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_decimal_h2.yml
new file mode 100644
index 000000000..3ee15c409
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_decimal_h2.yml
@@ -0,0 +1,46 @@
+#Licensed to the Apache Software Foundation (ASF) under one or more contributor
+#license agreements. See the NOTICE file distributed with this work for additional
+#information regarding copyright ownership. The ASF licenses this file to
+#You under the Apache License, Version 2.0 (the "License"); you may not use
+#this file except in compliance with the License. You may obtain a copy of
+#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+#by applicable law or agreed to in writing, software distributed under the
+#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+#OF ANY KIND, either express or implied. See the License for the specific
+#language governing permissions and limitations under the License.
+
+name: 'table1'
+
+type: 'decimal'
+
+vector: 'DECIMAL_FIELD6'
+
+create: 'CREATE TABLE table1 (decimal_field6 DECIMAL(20,2));'
+
+data:
+ - 'INSERT INTO table1 VALUES (17345667789.23);'
+ - 'INSERT INTO table1 VALUES (17345667789.23);'
+ - 'INSERT INTO table1 VALUES (17345667789.23);'
+ - 'INSERT INTO table1 VALUES (17345667789.23);'
+ - 'INSERT INTO table1 VALUES (17345667789.23);'
+ - 'INSERT INTO table1 VALUES (17345667789.23);'
+ - 'INSERT INTO table1 VALUES (17345667789.23);'
+ - 'INSERT INTO table1 VALUES (17345667789.23);'
+ - 'INSERT INTO table1 VALUES (17345667789.23);'
+ - 'INSERT INTO table1 VALUES (17345667789.23);'
+
+query: 'select decimal_field6 from table1;'
+
+drop: 'DROP table table1;'
+
+values:
+ - '17345667789.23'
+ - '17345667789.23'
+ - '17345667789.23'
+ - '17345667789.23'
+ - '17345667789.23'
+ - '17345667789.23'
+ - '17345667789.23'
+ - '17345667789.23'
+ - '17345667789.23'
+ - '17345667789.23'
diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_double_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_double_h2.yml
new file mode 100644
index 000000000..f41900928
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_double_h2.yml
@@ -0,0 +1,46 @@
+#Licensed to the Apache Software Foundation (ASF) under one or more contributor
+#license agreements. See the NOTICE file distributed with this work for additional
+#information regarding copyright ownership. The ASF licenses this file to
+#You under the Apache License, Version 2.0 (the "License"); you may not use
+#this file except in compliance with the License. You may obtain a copy of
+#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+#by applicable law or agreed to in writing, software distributed under the
+#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+#OF ANY KIND, either express or implied. See the License for the specific
+#language governing permissions and limitations under the License.
+
+name: 'table1'
+
+type: 'double'
+
+vector: 'DOUBLE_FIELD7'
+
+create: 'CREATE TABLE table1 (double_field7 DOUBLE);'
+
+data:
+ - 'INSERT INTO table1 VALUES (56478356785.345);'
+ - 'INSERT INTO table1 VALUES (56478356785.345);'
+ - 'INSERT INTO table1 VALUES (56478356785.345);'
+ - 'INSERT INTO table1 VALUES (56478356785.345);'
+ - 'INSERT INTO table1 VALUES (56478356785.345);'
+ - 'INSERT INTO table1 VALUES (56478356785.345);'
+ - 'INSERT INTO table1 VALUES (56478356785.345);'
+ - 'INSERT INTO table1 VALUES (56478356785.345);'
+ - 'INSERT INTO table1 VALUES (56478356785.345);'
+ - 'INSERT INTO table1 VALUES (56478356785.345);'
+
+query: 'select double_field7 from table1;'
+
+drop: 'DROP table table1;'
+
+values:
+ - '56478356785.345'
+ - '56478356785.345'
+ - '56478356785.345'
+ - '56478356785.345'
+ - '56478356785.345'
+ - '56478356785.345'
+ - '56478356785.345'
+ - '56478356785.345'
+ - '56478356785.345'
+ - '56478356785.345'
diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_est_date_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_est_date_h2.yml
new file mode 100644
index 000000000..1868db3ad
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_est_date_h2.yml
@@ -0,0 +1,48 @@
+#Licensed to the Apache Software Foundation (ASF) under one or more contributor
+#license agreements. See the NOTICE file distributed with this work for additional
+#information regarding copyright ownership. The ASF licenses this file to
+#You under the Apache License, Version 2.0 (the "License"); you may not use
+#this file except in compliance with the License. You may obtain a copy of
+#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+#by applicable law or agreed to in writing, software distributed under the
+#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+#OF ANY KIND, either express or implied. See the License for the specific
+#language governing permissions and limitations under the License.
+
+name: 'table1'
+
+type: 'est_date'
+
+timezone: 'EST'
+
+vector: 'DATE_FIELD10'
+
+create: 'CREATE TABLE table1 (date_field10 DATE);'
+
+data:
+ - 'INSERT INTO table1 VALUES (''2018-02-12'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12'');'
+
+query: 'select date_field10 from table1;'
+
+drop: 'DROP table table1;'
+
+values:
+ - '17574'
+ - '17574'
+ - '17574'
+ - '17574'
+ - '17574'
+ - '17574'
+ - '17574'
+ - '17574'
+ - '17574'
+ - '17574'
diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_est_time_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_est_time_h2.yml
new file mode 100644
index 000000000..c6fc7a180
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_est_time_h2.yml
@@ -0,0 +1,48 @@
+#Licensed to the Apache Software Foundation (ASF) under one or more contributor
+#license agreements. See the NOTICE file distributed with this work for additional
+#information regarding copyright ownership. The ASF licenses this file to
+#You under the Apache License, Version 2.0 (the "License"); you may not use
+#this file except in compliance with the License. You may obtain a copy of
+#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+#by applicable law or agreed to in writing, software distributed under the
+#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+#OF ANY KIND, either express or implied. See the License for the specific
+#language governing permissions and limitations under the License.
+
+name: 'table1'
+
+type: 'est_time'
+
+timezone: 'EST'
+
+vector: 'TIME_FIELD9'
+
+create: 'CREATE TABLE table1 (time_field9 TIME);'
+
+data:
+ - 'INSERT INTO table1 VALUES (''12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''12:45:35'');'
+
+query: 'select time_field9 from table1;'
+
+drop: 'DROP table table1;'
+
+values:
+ - '63935000'
+ - '63935000'
+ - '63935000'
+ - '63935000'
+ - '63935000'
+ - '63935000'
+ - '63935000'
+ - '63935000'
+ - '63935000'
+ - '63935000'
diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_est_timestamp_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_est_timestamp_h2.yml
new file mode 100644
index 000000000..b0ec5b708
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_est_timestamp_h2.yml
@@ -0,0 +1,49 @@
+#Licensed to the Apache Software Foundation (ASF) under one or more contributor
+#license agreements. See the NOTICE file distributed with this work for additional
+#information regarding copyright ownership. The ASF licenses this file to
+#You under the Apache License, Version 2.0 (the "License"); you may not use
+#this file except in compliance with the License. You may obtain a copy of
+#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+#by applicable law or agreed to in writing, software distributed under the
+#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+#OF ANY KIND, either express or implied. See the License for the specific
+#language governing permissions and limitations under the License.
+
+name: 'table1'
+
+type: 'est_timestamp'
+
+timezone: 'EST'
+
+vector: 'TIMESTAMP_FIELD11'
+
+create: 'CREATE TABLE table1 (timestamp_field11 TIMESTAMP);'
+
+data:
+ - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');'
+
+query: 'select timestamp_field11 from table1;'
+
+drop: 'DROP table table1;'
+
+values:
+ - '1518457535000'
+ - '1518457535000'
+ - '1518457535000'
+ - '1518457535000'
+ - '1518457535000'
+ - '1518457535000'
+ - '1518457535000'
+ - '1518457535000'
+ - '1518457535000'
+ - '1518457535000'
+ \ No newline at end of file
diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_gmt_date_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_gmt_date_h2.yml
new file mode 100644
index 000000000..65824861a
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_gmt_date_h2.yml
@@ -0,0 +1,48 @@
+#Licensed to the Apache Software Foundation (ASF) under one or more contributor
+#license agreements. See the NOTICE file distributed with this work for additional
+#information regarding copyright ownership. The ASF licenses this file to
+#You under the Apache License, Version 2.0 (the "License"); you may not use
+#this file except in compliance with the License. You may obtain a copy of
+#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+#by applicable law or agreed to in writing, software distributed under the
+#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+#OF ANY KIND, either express or implied. See the License for the specific
+#language governing permissions and limitations under the License.
+
+name: 'table1'
+
+type: 'gmt_date'
+
+timezone: 'GMT'
+
+vector: 'DATE_FIELD10'
+
+create: 'CREATE TABLE table1 (date_field10 DATE);'
+
+data:
+ - 'INSERT INTO table1 VALUES (''2018-02-12'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12'');'
+
+query: 'select date_field10 from table1;'
+
+drop: 'DROP table table1;'
+
+values:
+ - '17574'
+ - '17574'
+ - '17574'
+ - '17574'
+ - '17574'
+ - '17574'
+ - '17574'
+ - '17574'
+ - '17574'
+ - '17574' \ No newline at end of file
diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_gmt_time_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_gmt_time_h2.yml
new file mode 100644
index 000000000..ae28c51e2
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_gmt_time_h2.yml
@@ -0,0 +1,48 @@
+#Licensed to the Apache Software Foundation (ASF) under one or more contributor
+#license agreements. See the NOTICE file distributed with this work for additional
+#information regarding copyright ownership. The ASF licenses this file to
+#You under the Apache License, Version 2.0 (the "License"); you may not use
+#this file except in compliance with the License. You may obtain a copy of
+#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+#by applicable law or agreed to in writing, software distributed under the
+#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+#OF ANY KIND, either express or implied. See the License for the specific
+#language governing permissions and limitations under the License.
+
+name: 'table1'
+
+type: 'gmt_time'
+
+timezone: 'GMT'
+
+vector: 'TIME_FIELD9'
+
+create: 'CREATE TABLE table1 (time_field9 TIME);'
+
+data:
+ - 'INSERT INTO table1 VALUES (''12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''12:45:35'');'
+
+query: 'select time_field9 from table1;'
+
+drop: 'DROP table table1;'
+
+values:
+ - '45935000'
+ - '45935000'
+ - '45935000'
+ - '45935000'
+ - '45935000'
+ - '45935000'
+ - '45935000'
+ - '45935000'
+ - '45935000'
+ - '45935000' \ No newline at end of file
diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_gmt_timestamp_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_gmt_timestamp_h2.yml
new file mode 100644
index 000000000..b468f5af9
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_gmt_timestamp_h2.yml
@@ -0,0 +1,48 @@
+#Licensed to the Apache Software Foundation (ASF) under one or more contributor
+#license agreements. See the NOTICE file distributed with this work for additional
+#information regarding copyright ownership. The ASF licenses this file to
+#You under the Apache License, Version 2.0 (the "License"); you may not use
+#this file except in compliance with the License. You may obtain a copy of
+#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+#by applicable law or agreed to in writing, software distributed under the
+#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+#OF ANY KIND, either express or implied. See the License for the specific
+#language governing permissions and limitations under the License.
+
+name: 'table1'
+
+type: 'gmt_timestamp'
+
+timezone: 'GMT'
+
+vector: 'TIMESTAMP_FIELD11'
+
+create: 'CREATE TABLE table1 (timestamp_field11 TIMESTAMP);'
+
+data:
+ - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');'
+
+query: 'select timestamp_field11 from table1;'
+
+drop: 'DROP table table1;'
+
+values:
+ - '1518439535000'
+ - '1518439535000'
+ - '1518439535000'
+ - '1518439535000'
+ - '1518439535000'
+ - '1518439535000'
+ - '1518439535000'
+ - '1518439535000'
+ - '1518439535000'
+ - '1518439535000' \ No newline at end of file
diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_int_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_int_h2.yml
new file mode 100644
index 000000000..8b6e776cb
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_int_h2.yml
@@ -0,0 +1,46 @@
+#Licensed to the Apache Software Foundation (ASF) under one or more contributor
+#license agreements. See the NOTICE file distributed with this work for additional
+#information regarding copyright ownership. The ASF licenses this file to
+#You under the Apache License, Version 2.0 (the "License"); you may not use
+#this file except in compliance with the License. You may obtain a copy of
+#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+#by applicable law or agreed to in writing, software distributed under the
+#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+#OF ANY KIND, either express or implied. See the License for the specific
+#language governing permissions and limitations under the License.
+
+name: 'table1'
+
+type: 'int'
+
+vector: 'INT_FIELD1'
+
+create: 'CREATE TABLE table1 (int_field1 INT);'
+
+data:
+ - 'INSERT INTO table1 VALUES (101);'
+ - 'INSERT INTO table1 VALUES (101);'
+ - 'INSERT INTO table1 VALUES (101);'
+ - 'INSERT INTO table1 VALUES (101);'
+ - 'INSERT INTO table1 VALUES (101);'
+ - 'INSERT INTO table1 VALUES (101);'
+ - 'INSERT INTO table1 VALUES (101);'
+ - 'INSERT INTO table1 VALUES (101);'
+ - 'INSERT INTO table1 VALUES (101);'
+ - 'INSERT INTO table1 VALUES (101);'
+
+values:
+ - '101'
+ - '101'
+ - '101'
+ - '101'
+ - '101'
+ - '101'
+ - '101'
+ - '101'
+ - '101'
+ - '101'
+
+query: 'select int_field1 from table1;'
+
+drop: 'DROP table table1;'
diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_null_and_notnull.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_null_and_notnull.yml
new file mode 100644
index 000000000..20e9e5e5e
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_null_and_notnull.yml
@@ -0,0 +1,26 @@
+#Licensed to the Apache Software Foundation (ASF) under one or more contributor
+#license agreements. See the NOTICE file distributed with this work for additional
+#information regarding copyright ownership. The ASF licenses this file to
+#You under the Apache License, Version 2.0 (the "License"); you may not use
+#this file except in compliance with the License. You may obtain a copy of
+#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+#by applicable law or agreed to in writing, software distributed under the
+#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+#OF ANY KIND, either express or implied. See the License for the specific
+#language governing permissions and limitations under the License.
+
+name: 'table1'
+
+type: 'nullnotnull'
+
+create: 'CREATE TABLE table1 (int_field_null INT, int_field_notnull INT NOT NULL);'
+
+data:
+ - 'INSERT INTO table1 VALUES (0, 0);'
+ - 'INSERT INTO table1 VALUES (1, 1);'
+
+rowCount: '2'
+
+query: 'select int_field_null, int_field_notnull from table1;'
+
+drop: 'DROP table table1;' \ No newline at end of file
diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_null_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_null_h2.yml
new file mode 100644
index 000000000..37a9883f9
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_null_h2.yml
@@ -0,0 +1,36 @@
+#Licensed to the Apache Software Foundation (ASF) under one or more contributor
+#license agreements. See the NOTICE file distributed with this work for additional
+#information regarding copyright ownership. The ASF licenses this file to
+#You under the Apache License, Version 2.0 (the "License"); you may not use
+#this file except in compliance with the License. You may obtain a copy of
+#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+#by applicable law or agreed to in writing, software distributed under the
+#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+#OF ANY KIND, either express or implied. See the License for the specific
+#language governing permissions and limitations under the License.
+
+name: 'table1'
+
+type: 'null'
+
+vector: 'NULL_FIELD18'
+
+create: 'CREATE TABLE table1 (null_field18 NULL);'
+
+rowCount: '10'
+
+data:
+ - 'INSERT INTO table1 VALUES (null);'
+ - 'INSERT INTO table1 VALUES (null);'
+ - 'INSERT INTO table1 VALUES (null);'
+ - 'INSERT INTO table1 VALUES (null);'
+ - 'INSERT INTO table1 VALUES (null);'
+ - 'INSERT INTO table1 VALUES (null);'
+ - 'INSERT INTO table1 VALUES (null);'
+ - 'INSERT INTO table1 VALUES (null);'
+ - 'INSERT INTO table1 VALUES (null);'
+ - 'INSERT INTO table1 VALUES (null);'
+
+query: 'select null_field18 from table1;'
+
+drop: 'DROP table table1;'
diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_pst_date_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_pst_date_h2.yml
new file mode 100644
index 000000000..798cfc7d6
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_pst_date_h2.yml
@@ -0,0 +1,48 @@
+#Licensed to the Apache Software Foundation (ASF) under one or more contributor
+#license agreements. See the NOTICE file distributed with this work for additional
+#information regarding copyright ownership. The ASF licenses this file to
+#You under the Apache License, Version 2.0 (the "License"); you may not use
+#this file except in compliance with the License. You may obtain a copy of
+#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+#by applicable law or agreed to in writing, software distributed under the
+#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+#OF ANY KIND, either express or implied. See the License for the specific
+#language governing permissions and limitations under the License.
+
+name: 'table1'
+
+type: 'pst_date'
+
+timezone: 'PST'
+
+vector: 'DATE_FIELD10'
+
+create: 'CREATE TABLE table1 (date_field10 DATE);'
+
+data:
+ - 'INSERT INTO table1 VALUES (''2018-02-12'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12'');'
+
+query: 'select date_field10 from table1;'
+
+drop: 'DROP table table1;'
+
+values:
+ - '17574'
+ - '17574'
+ - '17574'
+ - '17574'
+ - '17574'
+ - '17574'
+ - '17574'
+ - '17574'
+ - '17574'
+ - '17574' \ No newline at end of file
diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_pst_time_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_pst_time_h2.yml
new file mode 100644
index 000000000..058d54d20
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_pst_time_h2.yml
@@ -0,0 +1,48 @@
+#Licensed to the Apache Software Foundation (ASF) under one or more contributor
+#license agreements. See the NOTICE file distributed with this work for additional
+#information regarding copyright ownership. The ASF licenses this file to
+#You under the Apache License, Version 2.0 (the "License"); you may not use
+#this file except in compliance with the License. You may obtain a copy of
+#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+#by applicable law or agreed to in writing, software distributed under the
+#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+#OF ANY KIND, either express or implied. See the License for the specific
+#language governing permissions and limitations under the License.
+
+name: 'table1'
+
+type: 'pst_time'
+
+timezone: 'PST'
+
+vector: 'TIME_FIELD9'
+
+create: 'CREATE TABLE table1 (time_field9 TIME);'
+
+data:
+ - 'INSERT INTO table1 VALUES (''12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''12:45:35'');'
+
+query: 'select time_field9 from table1;'
+
+drop: 'DROP table table1;'
+
+values:
+ - '74735000'
+ - '74735000'
+ - '74735000'
+ - '74735000'
+ - '74735000'
+ - '74735000'
+ - '74735000'
+ - '74735000'
+ - '74735000'
+ - '74735000' \ No newline at end of file
diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_pst_timestamp_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_pst_timestamp_h2.yml
new file mode 100644
index 000000000..19b6b5f44
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_pst_timestamp_h2.yml
@@ -0,0 +1,48 @@
+#Licensed to the Apache Software Foundation (ASF) under one or more contributor
+#license agreements. See the NOTICE file distributed with this work for additional
+#information regarding copyright ownership. The ASF licenses this file to
+#You under the Apache License, Version 2.0 (the "License"); you may not use
+#this file except in compliance with the License. You may obtain a copy of
+#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+#by applicable law or agreed to in writing, software distributed under the
+#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+#OF ANY KIND, either express or implied. See the License for the specific
+#language governing permissions and limitations under the License.
+
+name: 'table1'
+
+type: 'pst_timestamp'
+
+timezone: 'PST'
+
+vector: 'TIMESTAMP_FIELD11'
+
+create: 'CREATE TABLE table1 (timestamp_field11 TIMESTAMP);'
+
+data:
+ - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');'
+
+query: 'select timestamp_field11 from table1;'
+
+drop: 'DROP table table1;'
+
+values:
+ - '1518468335000'
+ - '1518468335000'
+ - '1518468335000'
+ - '1518468335000'
+ - '1518468335000'
+ - '1518468335000'
+ - '1518468335000'
+ - '1518468335000'
+ - '1518468335000'
+ - '1518468335000' \ No newline at end of file
diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_real_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_real_h2.yml
new file mode 100644
index 000000000..c8f8aeb78
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_real_h2.yml
@@ -0,0 +1,46 @@
+#Licensed to the Apache Software Foundation (ASF) under one or more contributor
+#license agreements. See the NOTICE file distributed with this work for additional
+#information regarding copyright ownership. The ASF licenses this file to
+#You under the Apache License, Version 2.0 (the "License"); you may not use
+#this file except in compliance with the License. You may obtain a copy of
+#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+#by applicable law or agreed to in writing, software distributed under the
+#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+#OF ANY KIND, either express or implied. See the License for the specific
+#language governing permissions and limitations under the License.
+
+name: 'table1'
+
+type: 'real'
+
+vector: 'REAL_FIELD8'
+
+create: 'CREATE TABLE table1 (real_field8 REAL);'
+
+data:
+ - 'INSERT INTO table1 VALUES (56478356785.345);'
+ - 'INSERT INTO table1 VALUES (56478356785.345);'
+ - 'INSERT INTO table1 VALUES (56478356785.345);'
+ - 'INSERT INTO table1 VALUES (56478356785.345);'
+ - 'INSERT INTO table1 VALUES (56478356785.345);'
+ - 'INSERT INTO table1 VALUES (56478356785.345);'
+ - 'INSERT INTO table1 VALUES (56478356785.345);'
+ - 'INSERT INTO table1 VALUES (56478356785.345);'
+ - 'INSERT INTO table1 VALUES (56478356785.345);'
+ - 'INSERT INTO table1 VALUES (56478356785.345);'
+
+query: 'select real_field8 from table1;'
+
+drop: 'DROP table table1;'
+
+values:
+ - '56478356785.345f'
+ - '56478356785.345f'
+ - '56478356785.345f'
+ - '56478356785.345f'
+ - '56478356785.345f'
+ - '56478356785.345f'
+ - '56478356785.345f'
+ - '56478356785.345f'
+ - '56478356785.345f'
+ - '56478356785.345f'
diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_selected_datatypes_null_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_selected_datatypes_null_h2.yml
new file mode 100644
index 000000000..93b1aae55
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_selected_datatypes_null_h2.yml
@@ -0,0 +1,46 @@
+#Licensed to the Apache Software Foundation (ASF) under one or more contributor
+#license agreements. See the NOTICE file distributed with this work for additional
+#information regarding copyright ownership. The ASF licenses this file to
+#You under the Apache License, Version 2.0 (the "License"); you may not use
+#this file except in compliance with the License. You may obtain a copy of
+#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+#by applicable law or agreed to in writing, software distributed under the
+#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+#OF ANY KIND, either express or implied. See the License for the specific
+#language governing permissions and limitations under the License.
+
+name: 'table1'
+
+type: 'selected_null_column'
+
+vectors:
+ - 'BIGINT_FIELD5'
+ - 'DECIMAL_FIELD6'
+ - 'DOUBLE_FIELD7'
+ - 'REAL_FIELD8'
+ - 'TIME_FIELD9'
+ - 'DATE_FIELD10'
+ - 'TIMESTAMP_FIELD11'
+ - 'BINARY_FIELD12'
+ - 'VARCHAR_FIELD13'
+ - 'BLOB_FIELD14'
+ - 'CLOB_FIELD15'
+ - 'CHAR_FIELD16'
+ - 'BIT_FIELD17'
+
+rowCount: '5'
+
+create: 'CREATE TABLE table1 (int_field1 INT, bool_field2 BOOLEAN, tinyint_field3 TINYINT, smallint_field4 SMALLINT, bigint_field5 BIGINT,
+ decimal_field6 DECIMAL(20,2), double_field7 DOUBLE, real_field8 REAL, time_field9 TIME, date_field10 DATE, timestamp_field11 TIMESTAMP,
+ binary_field12 BINARY(100), varchar_field13 VARCHAR(256), blob_field14 BLOB, clob_field15 CLOB, char_field16 CHAR(16), bit_field17 BIT);'
+
+data:
+ - 'INSERT INTO table1 (int_field1, bool_field2, tinyint_field3, smallint_field4) VALUES (102, 0, 46, 12001);'
+ - 'INSERT INTO table1 (int_field1, bool_field2, tinyint_field3, smallint_field4) VALUES (102, 0, 46, 12001);'
+ - 'INSERT INTO table1 (int_field1, bool_field2, tinyint_field3, smallint_field4) VALUES (102, 0, 46, 12001);'
+ - 'INSERT INTO table1 (int_field1, bool_field2, tinyint_field3, smallint_field4) VALUES (102, 0, 46, 12001);'
+ - 'INSERT INTO table1 (int_field1, bool_field2, tinyint_field3, smallint_field4) VALUES (102, 0, 46, 12001);'
+
+query: 'select bigint_field5, decimal_field6, double_field7, real_field8, time_field9, date_field10, timestamp_field11, binary_field12, varchar_field13, blob_field14, clob_field15, char_field16, bit_field17 from table1'
+
+drop: 'DROP table table1;' \ No newline at end of file
diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_smallint_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_smallint_h2.yml
new file mode 100644
index 000000000..887c74f4d
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_smallint_h2.yml
@@ -0,0 +1,46 @@
+#Licensed to the Apache Software Foundation (ASF) under one or more contributor
+#license agreements. See the NOTICE file distributed with this work for additional
+#information regarding copyright ownership. The ASF licenses this file to
+#You under the Apache License, Version 2.0 (the "License"); you may not use
+#this file except in compliance with the License. You may obtain a copy of
+#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+#by applicable law or agreed to in writing, software distributed under the
+#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+#OF ANY KIND, either express or implied. See the License for the specific
+#language governing permissions and limitations under the License.
+
+name: 'table1'
+
+type: 'small_int'
+
+vector: 'SMALLINT_FIELD4'
+
+create: 'CREATE TABLE table1 (smallint_field4 SMALLINT);'
+
+data:
+ - 'INSERT INTO table1 VALUES (12000);'
+ - 'INSERT INTO table1 VALUES (12000);'
+ - 'INSERT INTO table1 VALUES (12000);'
+ - 'INSERT INTO table1 VALUES (12000);'
+ - 'INSERT INTO table1 VALUES (12000);'
+ - 'INSERT INTO table1 VALUES (12000);'
+ - 'INSERT INTO table1 VALUES (12000);'
+ - 'INSERT INTO table1 VALUES (12000);'
+ - 'INSERT INTO table1 VALUES (12000);'
+ - 'INSERT INTO table1 VALUES (12000);'
+
+query: 'select smallint_field4 from table1;'
+
+drop: 'DROP table table1;'
+
+values:
+ - '12000'
+ - '12000'
+ - '12000'
+ - '12000'
+ - '12000'
+ - '12000'
+ - '12000'
+ - '12000'
+ - '12000'
+ - '12000'
diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_time_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_time_h2.yml
new file mode 100644
index 000000000..c9baaee8d
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_time_h2.yml
@@ -0,0 +1,46 @@
+#Licensed to the Apache Software Foundation (ASF) under one or more contributor
+#license agreements. See the NOTICE file distributed with this work for additional
+#information regarding copyright ownership. The ASF licenses this file to
+#You under the Apache License, Version 2.0 (the "License"); you may not use
+#this file except in compliance with the License. You may obtain a copy of
+#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+#by applicable law or agreed to in writing, software distributed under the
+#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+#OF ANY KIND, either express or implied. See the License for the specific
+#language governing permissions and limitations under the License.
+
+name: 'table1'
+
+type: 'time'
+
+vector: 'TIME_FIELD9'
+
+create: 'CREATE TABLE table1 (time_field9 TIME);'
+
+data:
+ - 'INSERT INTO table1 VALUES (''12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''12:45:35'');'
+
+query: 'select time_field9 from table1;'
+
+drop: 'DROP table table1;'
+
+values:
+ - '45935000'
+ - '45935000'
+ - '45935000'
+ - '45935000'
+ - '45935000'
+ - '45935000'
+ - '45935000'
+ - '45935000'
+ - '45935000'
+ - '45935000'
diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_timestamp_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_timestamp_h2.yml
new file mode 100644
index 000000000..7d93faad1
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_timestamp_h2.yml
@@ -0,0 +1,46 @@
+#Licensed to the Apache Software Foundation (ASF) under one or more contributor
+#license agreements. See the NOTICE file distributed with this work for additional
+#information regarding copyright ownership. The ASF licenses this file to
+#You under the Apache License, Version 2.0 (the "License"); you may not use
+#this file except in compliance with the License. You may obtain a copy of
+#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+#by applicable law or agreed to in writing, software distributed under the
+#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+#OF ANY KIND, either express or implied. See the License for the specific
+#language governing permissions and limitations under the License.
+
+name: 'table1'
+
+type: 'timestamp'
+
+vector: 'TIMESTAMP_FIELD11'
+
+create: 'CREATE TABLE table1 (timestamp_field11 TIMESTAMP);'
+
+data:
+ - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');'
+ - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');'
+
+query: 'select timestamp_field11 from table1;'
+
+drop: 'DROP table table1;'
+
+values:
+ - '1518439535000'
+ - '1518439535000'
+ - '1518439535000'
+ - '1518439535000'
+ - '1518439535000'
+ - '1518439535000'
+ - '1518439535000'
+ - '1518439535000'
+ - '1518439535000'
+ - '1518439535000'
diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_tinyint_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_tinyint_h2.yml
new file mode 100644
index 000000000..a419416c8
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_tinyint_h2.yml
@@ -0,0 +1,46 @@
+#Licensed to the Apache Software Foundation (ASF) under one or more contributor
+#license agreements. See the NOTICE file distributed with this work for additional
+#information regarding copyright ownership. The ASF licenses this file to
+#You under the Apache License, Version 2.0 (the "License"); you may not use
+#this file except in compliance with the License. You may obtain a copy of
+#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+#by applicable law or agreed to in writing, software distributed under the
+#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+#OF ANY KIND, either express or implied. See the License for the specific
+#language governing permissions and limitations under the License.
+
+name: 'table1'
+
+type: 'tinyint'
+
+vector: 'TINYINT_FIELD3'
+
+create: 'CREATE TABLE table1 (tinyint_field3 TINYINT);'
+
+data:
+ - 'INSERT INTO table1 VALUES (45);'
+ - 'INSERT INTO table1 VALUES (45);'
+ - 'INSERT INTO table1 VALUES (45);'
+ - 'INSERT INTO table1 VALUES (45);'
+ - 'INSERT INTO table1 VALUES (45);'
+ - 'INSERT INTO table1 VALUES (45);'
+ - 'INSERT INTO table1 VALUES (45);'
+ - 'INSERT INTO table1 VALUES (45);'
+ - 'INSERT INTO table1 VALUES (45);'
+ - 'INSERT INTO table1 VALUES (45);'
+
+query: 'select tinyint_field3 from table1;'
+
+drop: 'DROP table table1;'
+
+values:
+ - '45'
+ - '45'
+ - '45'
+ - '45'
+ - '45'
+ - '45'
+ - '45'
+ - '45'
+ - '45'
+ - '45'
diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_varchar_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_varchar_h2.yml
new file mode 100644
index 000000000..0bd142178
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_varchar_h2.yml
@@ -0,0 +1,46 @@
+#Licensed to the Apache Software Foundation (ASF) under one or more contributor
+#license agreements. See the NOTICE file distributed with this work for additional
+#information regarding copyright ownership. The ASF licenses this file to
+#You under the Apache License, Version 2.0 (the "License"); you may not use
+#this file except in compliance with the License. You may obtain a copy of
+#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+#by applicable law or agreed to in writing, software distributed under the
+#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+#OF ANY KIND, either express or implied. See the License for the specific
+#language governing permissions and limitations under the License.
+
+name: 'table1'
+
+type: 'varchar'
+
+vector: 'VARCHAR_FIELD13'
+
+create: 'CREATE TABLE table1 (varchar_field13 VARCHAR(256));'
+
+data:
+ - 'INSERT INTO table1 VALUES (''some text that needs to be converted to varchar'');'
+ - 'INSERT INTO table1 VALUES (''some text that needs to be converted to varchar'');'
+ - 'INSERT INTO table1 VALUES (''some text that needs to be converted to varchar'');'
+ - 'INSERT INTO table1 VALUES (''some text that needs to be converted to varchar'');'
+ - 'INSERT INTO table1 VALUES (''some text that needs to be converted to varchar'');'
+ - 'INSERT INTO table1 VALUES (''some text that needs to be converted to varchar'');'
+ - 'INSERT INTO table1 VALUES (''some text that needs to be converted to varchar'');'
+ - 'INSERT INTO table1 VALUES (''some text that needs to be converted to varchar'');'
+ - 'INSERT INTO table1 VALUES (''some text that needs to be converted to varchar'');'
+ - 'INSERT INTO table1 VALUES (''some text that needs to be converted to varchar'');'
+
+query: 'select varchar_field13 from table1;'
+
+drop: 'DROP table table1;'
+
+values:
+ - 'some text that needs to be converted to varchar'
+ - 'some text that needs to be converted to varchar'
+ - 'some text that needs to be converted to varchar'
+ - 'some text that needs to be converted to varchar'
+ - 'some text that needs to be converted to varchar'
+ - 'some text that needs to be converted to varchar'
+ - 'some text that needs to be converted to varchar'
+ - 'some text that needs to be converted to varchar'
+ - 'some text that needs to be converted to varchar'
+ - 'some text that needs to be converted to varchar'
diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/logback.xml b/src/arrow/java/adapter/jdbc/src/test/resources/logback.xml
new file mode 100644
index 000000000..4c54d18a2
--- /dev/null
+++ b/src/arrow/java/adapter/jdbc/src/test/resources/logback.xml
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+ license agreements. See the NOTICE file distributed with this work for additional
+ information regarding copyright ownership. The ASF licenses this file to
+ You under the Apache License, Version 2.0 (the "License"); you may not use
+ this file except in compliance with the License. You may obtain a copy of
+ the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+ by applicable law or agreed to in writing, software distributed under the
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ OF ANY KIND, either express or implied. See the License for the specific
+ language governing permissions and limitations under the License. -->
+
+<configuration>
+ <statusListener class="ch.qos.logback.core.status.NopStatusListener"/>
+ <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
+ <!-- encoders are assigned the type
+ ch.qos.logback.classic.encoder.PatternLayoutEncoder by default -->
+ <encoder>
+ <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
+ </encoder>
+ </appender>
+
+ <logger name="org.apache.arrow" additivity="false">
+ <level value="info" />
+ <appender-ref ref="STDOUT" />
+ </logger>
+
+</configuration>
diff --git a/src/arrow/java/adapter/orc/CMakeLists.txt b/src/arrow/java/adapter/orc/CMakeLists.txt
new file mode 100644
index 000000000..e2d4655d7
--- /dev/null
+++ b/src/arrow/java/adapter/orc/CMakeLists.txt
@@ -0,0 +1,43 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#
+# arrow_orc_java
+#
+
+# Headers: top level
+
+project(arrow_orc_java)
+
+# Find java/jni
+include(FindJava)
+include(UseJava)
+include(FindJNI)
+
+message("generating headers to ${JNI_HEADERS_DIR}")
+
+add_jar(arrow_orc_java
+ src/main/java/org/apache/arrow/adapter/orc/OrcReaderJniWrapper.java
+ src/main/java/org/apache/arrow/adapter/orc/OrcStripeReaderJniWrapper.java
+ src/main/java/org/apache/arrow/adapter/orc/OrcMemoryJniWrapper.java
+ src/main/java/org/apache/arrow/adapter/orc/OrcJniUtils.java
+ src/main/java/org/apache/arrow/adapter/orc/OrcRecordBatch.java
+ src/main/java/org/apache/arrow/adapter/orc/OrcFieldNode.java
+ GENERATE_NATIVE_HEADERS
+ arrow_orc_java-native
+ DESTINATION
+ ${JNI_HEADERS_DIR})
diff --git a/src/arrow/java/adapter/orc/pom.xml b/src/arrow/java/adapter/orc/pom.xml
new file mode 100644
index 000000000..26f5f0c28
--- /dev/null
+++ b/src/arrow/java/adapter/orc/pom.xml
@@ -0,0 +1,113 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+ license agreements. See the NOTICE file distributed with this work for additional
+ information regarding copyright ownership. The ASF licenses this file to
+ You under the Apache License, Version 2.0 (the "License"); you may not use
+ this file except in compliance with the License. You may obtain a copy of
+ the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+ by applicable law or agreed to in writing, software distributed under the
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ OF ANY KIND, either express or implied. See the License for the specific
+ language governing permissions and limitations under the License. -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-memory-core</artifactId>
+ <version>${project.version}</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-memory-netty</artifactId>
+ <version>${project.version}</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-vector</artifactId>
+ <version>${project.version}</version>
+ <scope>compile</scope>
+ <classifier>${arrow.vector.classifier}</classifier>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.orc</groupId>
+ <artifactId>orc-core</artifactId>
+ <version>1.7.0</version>
+ <scope>test</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-log4j12</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>commons-logging</groupId>
+ <artifactId>commons-logging</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ <version>2.2.0</version>
+ <scope>test</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>commons-logging</groupId>
+ <artifactId>commons-logging</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-log4j12</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>javax.servlet</groupId>
+ <artifactId>servlet-api</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hive</groupId>
+ <artifactId>hive-storage-api</artifactId>
+ <version>2.8.1</version>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+
+ <parent>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-java-root</artifactId>
+ <version>6.0.1</version>
+ <relativePath>../../pom.xml</relativePath>
+ </parent>
+
+ <groupId>org.apache.arrow.orc</groupId>
+ <artifactId>arrow-orc</artifactId>
+ <name>Arrow Orc Adapter</name>
+ <description>(Experimental/Contrib)A JNI wrapper for the C++ ORC reader implementation.</description>
+ <packaging>jar</packaging>
+ <properties>
+ <arrow.cpp.build.dir>../../../cpp/release-build/</arrow.cpp.build.dir>
+ </properties>
+
+ <build>
+ <resources>
+ <resource>
+ <directory>${arrow.cpp.build.dir}</directory>
+ <includes>
+ <include>**/libarrow_orc_jni.*</include>
+ </includes>
+ </resource>
+ </resources>
+ </build>
+</project>
diff --git a/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcFieldNode.java b/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcFieldNode.java
new file mode 100644
index 000000000..716a13876
--- /dev/null
+++ b/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcFieldNode.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.orc;
+
+/**
+ * Metadata about Vectors/Arrays that is passed via JNI interface.
+ */
+class OrcFieldNode {
+
+ private final int length;
+ private final int nullCount;
+
+ /**
+ * Construct a new instance.
+ * @param length the number of values written.
+ * @param nullCount the number of null values.
+ */
+ public OrcFieldNode(int length, int nullCount) {
+ this.length = length;
+ this.nullCount = nullCount;
+ }
+
+ int getLength() {
+ return length;
+ }
+
+ int getNullCount() {
+ return nullCount;
+ }
+}
diff --git a/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcJniUtils.java b/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcJniUtils.java
new file mode 100644
index 000000000..600569be7
--- /dev/null
+++ b/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcJniUtils.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.orc;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.StandardCopyOption;
+
+/**
+ * Helper class for JNI related operations.
+ */
+class OrcJniUtils {
+ private static final String LIBRARY_NAME = "arrow_orc_jni";
+ private static boolean isLoaded = false;
+
+ private OrcJniUtils() {}
+
+ static void loadOrcAdapterLibraryFromJar()
+ throws IOException, IllegalAccessException {
+ synchronized (OrcJniUtils.class) {
+ if (!isLoaded) {
+ final String libraryToLoad = System.mapLibraryName(LIBRARY_NAME);
+ final File libraryFile = moveFileFromJarToTemp(
+ System.getProperty("java.io.tmpdir"), libraryToLoad);
+ System.load(libraryFile.getAbsolutePath());
+ isLoaded = true;
+ }
+ }
+ }
+
+ private static File moveFileFromJarToTemp(final String tmpDir, String libraryToLoad)
+ throws IOException {
+ final File temp = File.createTempFile(tmpDir, libraryToLoad);
+ try (final InputStream is = OrcReaderJniWrapper.class.getClassLoader()
+ .getResourceAsStream(libraryToLoad)) {
+ if (is == null) {
+ throw new FileNotFoundException(libraryToLoad);
+ } else {
+ Files.copy(is, temp.toPath(), StandardCopyOption.REPLACE_EXISTING);
+ }
+ }
+ return temp;
+ }
+}
diff --git a/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcMemoryJniWrapper.java b/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcMemoryJniWrapper.java
new file mode 100644
index 000000000..473e83142
--- /dev/null
+++ b/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcMemoryJniWrapper.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.orc;
+
+/**
+ * Wrapper for orc memory allocated by native code.
+ */
+class OrcMemoryJniWrapper implements AutoCloseable {
+
+ private final long nativeInstanceId;
+
+ private final long memoryAddress;
+
+ private final long size;
+
+ private final long capacity;
+
+ /**
+ * Construct a new instance.
+ * @param nativeInstanceId unique id of the underlying memory.
+ * @param memoryAddress starting memory address of the underlying memory.
+ * @param size size of the valid data.
+ * @param capacity allocated memory size.
+ */
+ OrcMemoryJniWrapper(long nativeInstanceId, long memoryAddress, long size, long capacity) {
+ this.nativeInstanceId = nativeInstanceId;
+ this.memoryAddress = memoryAddress;
+ this.size = size;
+ this.capacity = capacity;
+ }
+
+ /**
+ * Return the size of underlying chunk of memory that has valid data.
+ * @return valid data size
+ */
+ long getSize() {
+ return size;
+ }
+
+ /**
+ * Return the size of underlying chunk of memory managed by this OrcMemoryJniWrapper.
+ * @return underlying memory size
+ */
+ long getCapacity() {
+ return capacity;
+ }
+
+ /**
+ * Return the memory address of underlying chunk of memory.
+ * @return memory address
+ */
+ long getMemoryAddress() {
+ return memoryAddress;
+ }
+
+ @Override
+ public void close() {
+ release(nativeInstanceId);
+ }
+
+ private native void release(long id);
+}
diff --git a/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReader.java b/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReader.java
new file mode 100644
index 000000000..b42ddb484
--- /dev/null
+++ b/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReader.java
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.orc;
+
+import java.io.IOException;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.ipc.ArrowReader;
+
+/**
+ * Orc Reader that allow accessing orc stripes in Orc file.
+ * This orc reader basically acts like an ArrowReader iterator that
+ * iterate over orc stripes. Each stripe will be accessed via an
+ * ArrowReader.
+ */
+public class OrcReader implements AutoCloseable {
+ private final OrcReaderJniWrapper jniWrapper;
+ private BufferAllocator allocator;
+
+ /**
+ * reference to native reader instance.
+ */
+ private final long nativeInstanceId;
+
+ /**
+ * Create an OrcReader that iterate over orc stripes.
+ * @param filePath file path to target file, currently only support local file.
+ * @param allocator allocator provided to ArrowReader.
+ * @throws IOException throws exception in case of file not found
+ */
+ public OrcReader(String filePath, BufferAllocator allocator) throws IOException, IllegalAccessException {
+ this.allocator = allocator;
+ this.jniWrapper = OrcReaderJniWrapper.getInstance();
+ this.nativeInstanceId = jniWrapper.open(filePath);
+ }
+
+ /**
+ * Seek to designated row. Invoke NextStripeReader() after seek
+ * will return stripe reader starting from designated row.
+ * @param rowNumber the rows number to seek
+ * @return true if seek operation is succeeded
+ */
+ public boolean seek(int rowNumber) throws IllegalArgumentException {
+ return jniWrapper.seek(nativeInstanceId, rowNumber);
+ }
+
+ /**
+ * Get a stripe level ArrowReader with specified batchSize in each record batch.
+ *
+ * @param batchSize the number of rows loaded on each iteration
+ * @return ArrowReader that iterate over current stripes
+ */
+ public ArrowReader nextStripeReader(long batchSize) throws IllegalArgumentException {
+ long stripeReaderId = jniWrapper.nextStripeReader(nativeInstanceId, batchSize);
+ if (stripeReaderId < 0) {
+ return null;
+ }
+
+ return new OrcStripeReader(stripeReaderId, allocator);
+ }
+
+ /**
+ * The number of stripes in the file.
+ *
+ * @return number of stripes
+ */
+ public int getNumberOfStripes() throws IllegalArgumentException {
+ return jniWrapper.getNumberOfStripes(nativeInstanceId);
+ }
+
+ @Override
+ public void close() throws Exception {
+ jniWrapper.close(nativeInstanceId);
+ }
+}
diff --git a/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReaderJniWrapper.java b/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReaderJniWrapper.java
new file mode 100644
index 000000000..ff449c343
--- /dev/null
+++ b/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReaderJniWrapper.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.orc;
+
+import java.io.IOException;
+
+/**
+ * JNI wrapper for Orc reader.
+ */
+class OrcReaderJniWrapper {
+
+ private static volatile OrcReaderJniWrapper INSTANCE;
+
+ static OrcReaderJniWrapper getInstance() throws IOException, IllegalAccessException {
+ if (INSTANCE == null) {
+ synchronized (OrcReaderJniWrapper.class) {
+ if (INSTANCE == null) {
+ OrcJniUtils.loadOrcAdapterLibraryFromJar();
+ INSTANCE = new OrcReaderJniWrapper();
+ }
+ }
+ }
+
+ return INSTANCE;
+ }
+
+ /**
+ * Construct a orc file reader over the target file.
+ * @param fileName absolute file path of target file
+ * @return id of the orc reader instance if file opened successfully,
+ * otherwise return error code * -1.
+ */
+ native long open(String fileName);
+
+ /**
+ * Release resources associated with designated reader instance.
+ * @param readerId id of the reader instance.
+ */
+ native void close(long readerId);
+
+ /**
+ * Seek to designated row. Invoke nextStripeReader() after seek
+ * will return id of stripe reader starting from designated row.
+ * @param readerId id of the reader instance
+ * @param rowNumber the rows number to seek
+ * @return true if seek operation is succeeded
+ */
+ native boolean seek(long readerId, int rowNumber);
+
+ /**
+ * The number of stripes in the file.
+ * @param readerId id of the reader instance
+ * @return number of stripes
+ */
+ native int getNumberOfStripes(long readerId);
+
+ /**
+ * Get a stripe level ArrowReader with specified batchSize in each record batch.
+ * @param readerId id of the reader instance
+ * @param batchSize the number of rows loaded on each iteration
+ * @return id of the stripe reader instance.
+ */
+ native long nextStripeReader(long readerId, long batchSize);
+}
diff --git a/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcRecordBatch.java b/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcRecordBatch.java
new file mode 100644
index 000000000..a006cacab
--- /dev/null
+++ b/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcRecordBatch.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.orc;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Wrapper for record batch meta and native memory.
+ */
+class OrcRecordBatch {
+ final int length;
+
+ /**
+ * Nodes correspond to the pre-ordered flattened logical schema.
+ */
+ final List<OrcFieldNode> nodes;
+
+ final List<OrcMemoryJniWrapper> buffers;
+
+ /**
+ * Construct a new instance.
+ * @param length number of records included in current batch
+ * @param nodes meta data for each fields
+ * @param buffers buffers for underlying data
+ */
+ OrcRecordBatch(int length, OrcFieldNode[] nodes, OrcMemoryJniWrapper[] buffers) {
+ this.length = length;
+ this.nodes = Arrays.asList(nodes);
+ this.buffers = Arrays.asList(buffers);
+ }
+}
diff --git a/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReferenceManager.java b/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReferenceManager.java
new file mode 100644
index 000000000..fdec337e8
--- /dev/null
+++ b/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReferenceManager.java
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.orc;
+
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.OwnershipTransferResult;
+import org.apache.arrow.memory.ReferenceManager;
+import org.apache.arrow.util.Preconditions;
+
+/**
+ * A simple reference manager implementation for memory allocated by native code.
+ * The underlying memory will be released when reference count reach zero.
+ */
+public class OrcReferenceManager implements ReferenceManager {
+ private final AtomicInteger bufRefCnt = new AtomicInteger(0);
+
+ private OrcMemoryJniWrapper memory;
+
+ OrcReferenceManager(OrcMemoryJniWrapper memory) {
+ this.memory = memory;
+ }
+
+ @Override
+ public int getRefCount() {
+ return bufRefCnt.get();
+ }
+
+ @Override
+ public boolean release() {
+ return release(1);
+ }
+
+ @Override
+ public boolean release(int decrement) {
+ Preconditions.checkState(decrement >= 1,
+ "ref count decrement should be greater than or equal to 1");
+ // decrement the ref count
+ final int refCnt;
+ synchronized (this) {
+ refCnt = bufRefCnt.addAndGet(-decrement);
+ if (refCnt == 0) {
+ // refcount of this reference manager has dropped to 0
+ // release the underlying memory
+ memory.close();
+ }
+ }
+ // the new ref count should be >= 0
+ Preconditions.checkState(refCnt >= 0, "RefCnt has gone negative");
+ return refCnt == 0;
+ }
+
+ @Override
+ public void retain() {
+ retain(1);
+ }
+
+ @Override
+ public void retain(int increment) {
+ Preconditions.checkArgument(increment > 0, "retain(%s) argument is not positive", increment);
+ bufRefCnt.addAndGet(increment);
+ }
+
+ @Override
+ public ArrowBuf retain(ArrowBuf srcBuffer, BufferAllocator targetAllocator) {
+ retain();
+ return srcBuffer;
+ }
+
+ @Override
+ public ArrowBuf deriveBuffer(ArrowBuf sourceBuffer, long index, long length) {
+ final long derivedBufferAddress = sourceBuffer.memoryAddress() + index;
+
+ // create new ArrowBuf
+ final ArrowBuf derivedBuf = new ArrowBuf(
+ this,
+ null,
+ length, // length (in bytes) in the underlying memory chunk for this new ArrowBuf
+ derivedBufferAddress // starting byte address in the underlying memory for this new ArrowBuf,
+ );
+
+ return derivedBuf;
+ }
+
+ @Override
+ public OwnershipTransferResult transferOwnership(ArrowBuf sourceBuffer, BufferAllocator targetAllocator) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public BufferAllocator getAllocator() {
+ return null;
+ }
+
+ @Override
+ public long getSize() {
+ return memory.getSize();
+ }
+
+ @Override
+ public long getAccountedSize() {
+ return 0;
+ }
+}
diff --git a/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReader.java b/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReader.java
new file mode 100644
index 000000000..484296d92
--- /dev/null
+++ b/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReader.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.orc;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.stream.Collectors;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.ipc.ArrowReader;
+import org.apache.arrow.vector.ipc.ReadChannel;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.ipc.message.MessageChannelReader;
+import org.apache.arrow.vector.ipc.message.MessageResult;
+import org.apache.arrow.vector.ipc.message.MessageSerializer;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel;
+
+/**
+ * Orc stripe that load data into ArrowRecordBatch.
+ */
+public class OrcStripeReader extends ArrowReader {
+ /**
+ * reference to native stripe reader instance.
+ */
+ private final long nativeInstanceId;
+
+ /**
+ * Construct a new instance.
+ * @param nativeInstanceId nativeInstanceId of the stripe reader instance, obtained by
+ * calling nextStripeReader from OrcReaderJniWrapper
+ * @param allocator memory allocator for accounting.
+ */
+ OrcStripeReader(long nativeInstanceId, BufferAllocator allocator) {
+ super(allocator);
+ this.nativeInstanceId = nativeInstanceId;
+ }
+
+ @Override
+ public boolean loadNextBatch() throws IOException {
+ OrcRecordBatch recordBatch = OrcStripeReaderJniWrapper.next(nativeInstanceId);
+ if (recordBatch == null) {
+ return false;
+ }
+
+ ArrayList<ArrowBuf> buffers = new ArrayList<>();
+ for (OrcMemoryJniWrapper buffer : recordBatch.buffers) {
+ buffers.add(new ArrowBuf(
+ new OrcReferenceManager(buffer),
+ null,
+ (int) buffer.getSize(),
+ buffer.getMemoryAddress()));
+ }
+
+ loadRecordBatch(new ArrowRecordBatch(
+ recordBatch.length,
+ recordBatch.nodes.stream()
+ .map(buf -> new ArrowFieldNode(buf.getLength(), buf.getNullCount()))
+ .collect(Collectors.toList()),
+ buffers));
+ return true;
+ }
+
+ @Override
+ public long bytesRead() {
+ return 0;
+ }
+
+
+ @Override
+ protected void closeReadSource() throws IOException {
+ OrcStripeReaderJniWrapper.close(nativeInstanceId);
+ }
+
+ @Override
+ protected Schema readSchema() throws IOException {
+ byte[] schemaBytes = OrcStripeReaderJniWrapper.getSchema(nativeInstanceId);
+
+ try (MessageChannelReader schemaReader =
+ new MessageChannelReader(
+ new ReadChannel(
+ new ByteArrayReadableSeekableByteChannel(schemaBytes)), allocator)) {
+
+ MessageResult result = schemaReader.readNext();
+ if (result == null) {
+ throw new IOException("Unexpected end of input. Missing schema.");
+ }
+
+ return MessageSerializer.deserializeSchema(result.getMessage());
+ }
+ }
+}
diff --git a/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReaderJniWrapper.java b/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReaderJniWrapper.java
new file mode 100644
index 000000000..1dd969861
--- /dev/null
+++ b/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReaderJniWrapper.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.orc;
+
+/**
+ * JNI wrapper for orc stripe reader.
+ */
+class OrcStripeReaderJniWrapper {
+
+ /**
+ * Get the schema of current stripe.
+ * @param readerId id of the stripe reader instance.
+ * @return serialized schema.
+ */
+ static native byte[] getSchema(long readerId);
+
+ /**
+ * Load next record batch.
+ * @param readerId id of the stripe reader instance.
+ * @return loaded record batch, return null when reached
+ * the end of current stripe.
+ */
+ static native OrcRecordBatch next(long readerId);
+
+ /**
+ * Release resources of underlying reader.
+ * @param readerId id of the stripe reader instance.
+ */
+ static native void close(long readerId);
+}
diff --git a/src/arrow/java/adapter/orc/src/test/java/org/apache/arrow/adapter/orc/OrcReaderTest.java b/src/arrow/java/adapter/orc/src/test/java/org/apache/arrow/adapter/orc/OrcReaderTest.java
new file mode 100644
index 000000000..4153a35a6
--- /dev/null
+++ b/src/arrow/java/adapter/orc/src/test/java/org/apache/arrow/adapter/orc/OrcReaderTest.java
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.orc;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+
+import java.io.File;
+import java.nio.charset.StandardCharsets;
+import java.util.List;
+
+
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.ipc.ArrowReader;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.orc.OrcFile;
+import org.apache.orc.TypeDescription;
+import org.apache.orc.Writer;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+
+public class OrcReaderTest {
+
+ @Rule
+ public TemporaryFolder testFolder = new TemporaryFolder();
+
+ private static final int MAX_ALLOCATION = 8 * 1024;
+ private static RootAllocator allocator;
+
+ @BeforeClass
+ public static void beforeClass() {
+ allocator = new RootAllocator(MAX_ALLOCATION);
+ }
+
+ @Test
+ public void testOrcJniReader() throws Exception {
+ TypeDescription schema = TypeDescription.fromString("struct<x:int,y:string>");
+ File testFile = new File(testFolder.getRoot(), "test-orc");
+
+ Writer writer = OrcFile.createWriter(new Path(testFile.getAbsolutePath()),
+ OrcFile.writerOptions(new Configuration()).setSchema(schema));
+ VectorizedRowBatch batch = schema.createRowBatch();
+ LongColumnVector longColumnVector = (LongColumnVector) batch.cols[0];
+ BytesColumnVector bytesColumnVector = (BytesColumnVector) batch.cols[1];
+ for (int r = 0; r < 1024; ++r) {
+ int row = batch.size++;
+ longColumnVector.vector[row] = r;
+ byte[] buffer = ("Last-" + (r * 3)).getBytes(StandardCharsets.UTF_8);
+ bytesColumnVector.setRef(row, buffer, 0, buffer.length);
+ }
+ writer.addRowBatch(batch);
+ writer.close();
+
+ OrcReader reader = new OrcReader(testFile.getAbsolutePath(), allocator);
+ assertEquals(1, reader.getNumberOfStripes());
+
+ ArrowReader stripeReader = reader.nextStripeReader(1024);
+ VectorSchemaRoot schemaRoot = stripeReader.getVectorSchemaRoot();
+ stripeReader.loadNextBatch();
+
+ List<FieldVector> fields = schemaRoot.getFieldVectors();
+ assertEquals(2, fields.size());
+
+ IntVector intVector = (IntVector) fields.get(0);
+ VarCharVector varCharVector = (VarCharVector) fields.get(1);
+ for (int i = 0; i < 1024; ++i) {
+ assertEquals(i, intVector.get(i));
+ assertEquals("Last-" + (i * 3), new String(varCharVector.get(i), StandardCharsets.UTF_8));
+ }
+
+ assertFalse(stripeReader.loadNextBatch());
+ assertNull(reader.nextStripeReader(1024));
+
+ stripeReader.close();
+ reader.close();
+ }
+}
diff --git a/src/arrow/java/algorithm/pom.xml b/src/arrow/java/algorithm/pom.xml
new file mode 100644
index 000000000..fa4787d30
--- /dev/null
+++ b/src/arrow/java/algorithm/pom.xml
@@ -0,0 +1,55 @@
+<?xml version="1.0"?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+ license agreements. See the NOTICE file distributed with this work for additional
+ information regarding copyright ownership. The ASF licenses this file to
+ You under the Apache License, Version 2.0 (the "License"); you may not use
+ this file except in compliance with the License. You may obtain a copy of
+ the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+ by applicable law or agreed to in writing, software distributed under the
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ OF ANY KIND, either express or implied. See the License for the specific
+ language governing permissions and limitations under the License. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-java-root</artifactId>
+ <version>6.0.1</version>
+ </parent>
+ <artifactId>arrow-algorithm</artifactId>
+ <name>Arrow Algorithms</name>
+ <description>(Experimental/Contrib) A collection of algorithms for working with ValueVectors.</description>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-vector</artifactId>
+ <version>${project.version}</version>
+ <classifier>${arrow.vector.classifier}</classifier>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-vector</artifactId>
+ <version>${project.version}</version>
+ <type>test-jar</type>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-memory-core</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-memory-netty</artifactId>
+ <version>${project.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>io.netty</groupId>
+ <artifactId>netty-common</artifactId>
+ </dependency>
+ </dependencies>
+
+ <build>
+ </build>
+</project>
diff --git a/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/DeduplicationUtils.java b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/DeduplicationUtils.java
new file mode 100644
index 000000000..8811e43d3
--- /dev/null
+++ b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/DeduplicationUtils.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.deduplicate;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.BitVectorHelper;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.compare.Range;
+import org.apache.arrow.vector.compare.RangeEqualsVisitor;
+import org.apache.arrow.vector.util.DataSizeRoundingUtil;
+
+/**
+ * Utilities for vector deduplication.
+ */
+class DeduplicationUtils {
+
+ /**
+ * Gets the start positions of the first distinct values in a vector.
+ * @param vector the target vector.
+ * @param runStarts the bit set to hold the start positions.
+ * @param <V> vector type.
+ */
+ public static <V extends ValueVector> void populateRunStartIndicators(V vector, ArrowBuf runStarts) {
+ int bufSize = DataSizeRoundingUtil.divideBy8Ceil(vector.getValueCount());
+ Preconditions.checkArgument(runStarts.capacity() >= bufSize);
+ runStarts.setZero(0, bufSize);
+
+ BitVectorHelper.setBit(runStarts, 0);
+ RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector, vector, null);
+ Range range = new Range(0, 0, 1);
+ for (int i = 1; i < vector.getValueCount(); i++) {
+ range.setLeftStart(i).setRightStart(i - 1);
+ if (!visitor.rangeEquals(range)) {
+ BitVectorHelper.setBit(runStarts, i);
+ }
+ }
+ }
+
+ /**
+ * Gets the run lengths, given the start positions.
+ * @param runStarts the bit set for start positions.
+ * @param runLengths the run length vector to populate.
+ * @param valueCount the number of values in the bit set.
+ */
+ public static void populateRunLengths(ArrowBuf runStarts, IntVector runLengths, int valueCount) {
+ int curStart = 0;
+ int lengthIndex = 0;
+ for (int i = 1; i < valueCount; i++) {
+ if (BitVectorHelper.get(runStarts, i) != 0) {
+ // we get a new distinct value
+ runLengths.setSafe(lengthIndex++, i - curStart);
+ curStart = i;
+ }
+ }
+
+ // process the last value
+ runLengths.setSafe(lengthIndex++, valueCount - curStart);
+ runLengths.setValueCount(lengthIndex);
+ }
+
+ /**
+ * Gets distinct values from the input vector by removing adjacent
+ * duplicated values.
+ * @param indicators the bit set containing the start positions of distinct values.
+ * @param inputVector the input vector.
+ * @param outputVector the output vector.
+ * @param <V> vector type.
+ */
+ public static <V extends ValueVector> void populateDeduplicatedValues(
+ ArrowBuf indicators, V inputVector, V outputVector) {
+ int dstIdx = 0;
+ for (int srcIdx = 0; srcIdx < inputVector.getValueCount(); srcIdx++) {
+ if (BitVectorHelper.get(indicators, srcIdx) != 0) {
+ outputVector.copyFromSafe(srcIdx, dstIdx++, inputVector);
+ }
+ }
+ outputVector.setValueCount(dstIdx);
+ }
+}
diff --git a/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/VectorRunDeduplicator.java b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/VectorRunDeduplicator.java
new file mode 100644
index 000000000..5ef03cbe4
--- /dev/null
+++ b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/VectorRunDeduplicator.java
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.deduplicate;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.BitVectorHelper;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.util.DataSizeRoundingUtil;
+
+/**
+ * Remove adjacent equal elements from a vector.
+ * If the vector is sorted, it removes all duplicated values in the vector.
+ * @param <V> vector type.
+ */
+public class VectorRunDeduplicator<V extends ValueVector> implements AutoCloseable {
+
+ /**
+ * Bit set for distinct values.
+ * If the value at some index is not equal to the previous value,
+ * its bit is set to 1, otherwise its bit is set to 0.
+ */
+ private ArrowBuf distinctValueBuffer;
+
+ /**
+ * The vector to deduplicate.
+ */
+ private final V vector;
+
+ private final BufferAllocator allocator;
+
+ /**
+ * Constructs a vector run deduplicator for a given vector.
+ * @param vector the vector to deduplicate. Ownership is NOT taken.
+ * @param allocator the allocator used for allocating buffers for start indices.
+ */
+ public VectorRunDeduplicator(V vector, BufferAllocator allocator) {
+ this.vector = vector;
+ this.allocator = allocator;
+ }
+
+ private void createDistinctValueBuffer() {
+ Preconditions.checkArgument(distinctValueBuffer == null);
+ int bufSize = DataSizeRoundingUtil.divideBy8Ceil(vector.getValueCount());
+ distinctValueBuffer = allocator.buffer(bufSize);
+ DeduplicationUtils.populateRunStartIndicators(vector, distinctValueBuffer);
+ }
+
+ /**
+ * Gets the number of values which are different from their predecessor.
+ * @return the run count.
+ */
+ public int getRunCount() {
+ if (distinctValueBuffer == null) {
+ createDistinctValueBuffer();
+ }
+ return vector.getValueCount() - BitVectorHelper.getNullCount(distinctValueBuffer, vector.getValueCount());
+ }
+
+ /**
+ * Gets the vector with deduplicated adjacent values removed.
+ * @param outVector the output vector.
+ */
+ public void populateDeduplicatedValues(V outVector) {
+ if (distinctValueBuffer == null) {
+ createDistinctValueBuffer();
+ }
+
+ DeduplicationUtils.populateDeduplicatedValues(distinctValueBuffer, vector, outVector);
+ }
+
+ /**
+ * Gets the length of each distinct value.
+ * @param lengthVector the vector for holding length values.
+ */
+ public void populateRunLengths(IntVector lengthVector) {
+ if (distinctValueBuffer == null) {
+ createDistinctValueBuffer();
+ }
+
+ DeduplicationUtils.populateRunLengths(distinctValueBuffer, lengthVector, vector.getValueCount());
+ }
+
+ @Override
+ public void close() {
+ if (distinctValueBuffer != null) {
+ distinctValueBuffer.close();
+ distinctValueBuffer = null;
+ }
+ }
+}
diff --git a/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryBuilder.java b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryBuilder.java
new file mode 100644
index 000000000..398368d1f
--- /dev/null
+++ b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryBuilder.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.dictionary;
+
+import org.apache.arrow.vector.ValueVector;
+
+/**
+ * A dictionary builder is intended for the scenario frequently encountered in practice:
+ * the dictionary is not known a priori, so it is generated dynamically.
+ * In particular, when a new value arrives, it is tested to check if it is already
+ * in the dictionary. If so, it is simply neglected, otherwise, it is added to the dictionary.
+ * <p>
+ * The dictionary builder is intended to build a single dictionary.
+ * So it cannot be used for different dictionaries.
+ * </p>
+ * <p>Below gives the sample code for using the dictionary builder
+ * <pre>{@code
+ * DictionaryBuilder dictionaryBuilder = ...
+ * ...
+ * dictionaryBuild.addValue(newValue);
+ * ...
+ * }</pre>
+ * </p>
+ * <p>
+ * With the above code, the dictionary vector will be populated,
+ * and it can be retrieved by the {@link DictionaryBuilder#getDictionary()} method.
+ * After that, dictionary encoding can proceed with the populated dictionary..
+ * </p>
+ *
+ * @param <V> the dictionary vector type.
+ */
+public interface DictionaryBuilder<V extends ValueVector> {
+
+ /**
+ * Try to add all values from the target vector to the dictionary.
+ *
+ * @param targetVector the target vector containing values to probe.
+ * @return the number of values actually added to the dictionary.
+ */
+ int addValues(V targetVector);
+
+ /**
+ * Try to add an element from the target vector to the dictionary.
+ *
+ * @param targetVector the target vector containing new element.
+ * @param targetIndex the index of the new element in the target vector.
+ * @return the index of the new element in the dictionary.
+ */
+ int addValue(V targetVector, int targetIndex);
+
+ /**
+ * Gets the dictionary built.
+ *
+ * @return the dictionary.
+ */
+ V getDictionary();
+}
diff --git a/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryEncoder.java b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryEncoder.java
new file mode 100644
index 000000000..cda7b3bf9
--- /dev/null
+++ b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryEncoder.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.dictionary;
+
+import org.apache.arrow.vector.BaseIntVector;
+import org.apache.arrow.vector.ValueVector;
+
+/**
+ * A dictionary encoder translates one vector into another one based on a dictionary vector.
+ * According to Arrow specification, the encoded vector must be an integer based vector, which
+ * is the index of the original vector element in the dictionary.
+ * @param <E> type of the encoded vector.
+ * @param <D> type of the vector to encode. It is also the type of the dictionary vector.
+ */
+public interface DictionaryEncoder<E extends BaseIntVector, D extends ValueVector> {
+
+ /**
+ * Translates an input vector into an output vector.
+ * @param input the input vector.
+ * @param output the output vector. Note that it must be in a fresh state. At least,
+ * all its validity bits should be clear.
+ */
+ void encode(D input, E output);
+}
diff --git a/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableBasedDictionaryBuilder.java b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableBasedDictionaryBuilder.java
new file mode 100644
index 000000000..dd2b73498
--- /dev/null
+++ b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableBasedDictionaryBuilder.java
@@ -0,0 +1,153 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.dictionary;
+
+import java.util.HashMap;
+
+import org.apache.arrow.memory.util.ArrowBufPointer;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.memory.util.hash.SimpleHasher;
+import org.apache.arrow.vector.ElementAddressableVector;
+
+/**
+ * This class builds the dictionary based on a hash table.
+ * Each add operation can be finished in O(1) time,
+ * where n is the current dictionary size.
+ *
+ * @param <V> the dictionary vector type.
+ */
+public class HashTableBasedDictionaryBuilder<V extends ElementAddressableVector> implements DictionaryBuilder<V> {
+
+ /**
+ * The dictionary to be built.
+ */
+ private final V dictionary;
+
+ /**
+ * If null should be encoded.
+ */
+ private final boolean encodeNull;
+
+ /**
+ * The hash map for distinct dictionary entries.
+ * The key is the pointer to the dictionary element, whereas the value is the index in the dictionary.
+ */
+ private HashMap<ArrowBufPointer, Integer> hashMap = new HashMap<>();
+
+ /**
+ * The hasher used for calculating the hash code.
+ */
+ private final ArrowBufHasher hasher;
+
+ /**
+ * Next pointer to try to add to the hash table.
+ */
+ private ArrowBufPointer nextPointer;
+
+ /**
+ * Constructs a hash table based dictionary builder.
+ *
+ * @param dictionary the dictionary to populate.
+ */
+ public HashTableBasedDictionaryBuilder(V dictionary) {
+ this(dictionary, false);
+ }
+
+ /**
+ * Constructs a hash table based dictionary builder.
+ *
+ * @param dictionary the dictionary to populate.
+ * @param encodeNull if null values should be added to the dictionary.
+ */
+ public HashTableBasedDictionaryBuilder(V dictionary, boolean encodeNull) {
+ this(dictionary, encodeNull, SimpleHasher.INSTANCE);
+ }
+
+ /**
+ * Constructs a hash table based dictionary builder.
+ *
+ * @param dictionary the dictionary to populate.
+ * @param encodeNull if null values should be added to the dictionary.
+ * @param hasher the hasher used to compute the hash code.
+ */
+ public HashTableBasedDictionaryBuilder(V dictionary, boolean encodeNull, ArrowBufHasher hasher) {
+ this.dictionary = dictionary;
+ this.encodeNull = encodeNull;
+ this.hasher = hasher;
+ this.nextPointer = new ArrowBufPointer(hasher);
+ }
+
+ /**
+ * Gets the dictionary built.
+ *
+ * @return the dictionary.
+ */
+ @Override
+ public V getDictionary() {
+ return dictionary;
+ }
+
+ /**
+ * Try to add all values from the target vector to the dictionary.
+ *
+ * @param targetVector the target vector containing values to probe.
+ * @return the number of values actually added to the dictionary.
+ */
+ @Override
+ public int addValues(V targetVector) {
+ int oldDictSize = dictionary.getValueCount();
+ for (int i = 0; i < targetVector.getValueCount(); i++) {
+ if (!encodeNull && targetVector.isNull(i)) {
+ continue;
+ }
+ addValue(targetVector, i);
+ }
+
+ return dictionary.getValueCount() - oldDictSize;
+ }
+
+ /**
+ * Try to add an element from the target vector to the dictionary.
+ *
+ * @param targetVector the target vector containing new element.
+ * @param targetIndex the index of the new element in the target vector.
+ * @return the index of the new element in the dictionary.
+ */
+ @Override
+ public int addValue(V targetVector, int targetIndex) {
+ targetVector.getDataPointer(targetIndex, nextPointer);
+
+ Integer index = hashMap.get(nextPointer);
+ if (index == null) {
+ // a new dictionary element is found
+
+ // insert it to the dictionary
+ int dictSize = dictionary.getValueCount();
+ dictionary.copyFromSafe(targetIndex, dictSize, targetVector);
+ dictionary.setValueCount(dictSize + 1);
+ dictionary.getDataPointer(dictSize, nextPointer);
+
+ // insert it to the hash map
+ hashMap.put(nextPointer, dictSize);
+ nextPointer = new ArrowBufPointer(hasher);
+
+ return dictSize;
+ }
+ return index;
+ }
+}
diff --git a/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableDictionaryEncoder.java b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableDictionaryEncoder.java
new file mode 100644
index 000000000..bea1a784c
--- /dev/null
+++ b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableDictionaryEncoder.java
@@ -0,0 +1,146 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.dictionary;
+
+import java.util.HashMap;
+
+import org.apache.arrow.memory.util.ArrowBufPointer;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.memory.util.hash.SimpleHasher;
+import org.apache.arrow.vector.BaseIntVector;
+import org.apache.arrow.vector.ElementAddressableVector;
+
+/**
+ * Dictionary encoder based on hash table.
+ * @param <E> encoded vector type.
+ * @param <D> decoded vector type, which is also the dictionary type.
+ */
+public class HashTableDictionaryEncoder<E extends BaseIntVector, D extends ElementAddressableVector>
+ implements DictionaryEncoder<E, D> {
+
+ /**
+ * The dictionary for encoding/decoding.
+ * It must be sorted.
+ */
+ private final D dictionary;
+
+ /**
+ * The hasher used to compute the hash code.
+ */
+ private final ArrowBufHasher hasher;
+
+ /**
+ * A flag indicating if null should be encoded.
+ */
+ private final boolean encodeNull;
+
+ /**
+ * The hash map for distinct dictionary entries.
+ * The key is the pointer to the dictionary element, whereas the value is the index in the dictionary.
+ */
+ private HashMap<ArrowBufPointer, Integer> hashMap = new HashMap<>();
+
+ /**
+ * The pointer used to probe each element to encode.
+ */
+ private ArrowBufPointer reusablePointer;
+
+ /**
+ * Constructs a dictionary encoder.
+ * @param dictionary the dictionary.
+ *
+ */
+ public HashTableDictionaryEncoder(D dictionary) {
+ this(dictionary, false);
+ }
+
+ /**
+ * Constructs a dictionary encoder.
+ * @param dictionary the dictionary.
+ * @param encodeNull a flag indicating if null should be encoded.
+ * It determines the behaviors for processing null values in the input during encoding/decoding.
+ * <li>
+ * For encoding, when a null is encountered in the input,
+ * 1) If the flag is set to true, the encoder searches for the value in the dictionary,
+ * and outputs the index in the dictionary.
+ * 2) If the flag is set to false, the encoder simply produces a null in the output.
+ * </li>
+ * <li>
+ * For decoding, when a null is encountered in the input,
+ * 1) If the flag is set to true, the decoder should never expect a null in the input.
+ * 2) If set to false, the decoder simply produces a null in the output.
+ * </li>
+ */
+ public HashTableDictionaryEncoder(D dictionary, boolean encodeNull) {
+ this(dictionary, encodeNull, SimpleHasher.INSTANCE);
+ }
+
+ /**
+ * Constructs a dictionary encoder.
+ * @param dictionary the dictionary.
+ * @param encodeNull a flag indicating if null should be encoded.
+ * It determines the behaviors for processing null values in the input during encoding.
+ * When a null is encountered in the input,
+ * 1) If the flag is set to true, the encoder searches for the value in the dictionary,
+ * and outputs the index in the dictionary.
+ * 2) If the flag is set to false, the encoder simply produces a null in the output.
+ * @param hasher the hasher used to calculate the hash code.
+ */
+ public HashTableDictionaryEncoder(D dictionary, boolean encodeNull, ArrowBufHasher hasher) {
+ this.dictionary = dictionary;
+ this.hasher = hasher;
+ this.encodeNull = encodeNull;
+
+ reusablePointer = new ArrowBufPointer(hasher);
+
+ buildHashMap();
+ }
+
+ private void buildHashMap() {
+ for (int i = 0; i < dictionary.getValueCount(); i++) {
+ ArrowBufPointer pointer = new ArrowBufPointer(hasher);
+ dictionary.getDataPointer(i, pointer);
+ hashMap.put(pointer, i);
+ }
+ }
+
+ /**
+ * Encodes an input vector by a hash table.
+ * So the algorithm takes O(n) time, where n is the length of the input vector.
+ *
+ * @param input the input vector.
+ * @param output the output vector.
+ **/
+ @Override
+ public void encode(D input, E output) {
+ for (int i = 0; i < input.getValueCount(); i++) {
+ if (!encodeNull && input.isNull(i)) {
+ continue;
+ }
+
+ input.getDataPointer(i, reusablePointer);
+ Integer index = hashMap.get(reusablePointer);
+
+ if (index == null) {
+ throw new IllegalArgumentException("The data element is not found in the dictionary");
+ }
+ output.setWithPossibleTruncate(i, index);
+ }
+ output.setValueCount(input.getValueCount());
+ }
+}
diff --git a/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/LinearDictionaryEncoder.java b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/LinearDictionaryEncoder.java
new file mode 100644
index 000000000..84a3a96af
--- /dev/null
+++ b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/LinearDictionaryEncoder.java
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.dictionary;
+
+import org.apache.arrow.vector.BaseIntVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.compare.Range;
+import org.apache.arrow.vector.compare.RangeEqualsVisitor;
+
+/**
+ * Dictionary encoder based on linear search.
+ * @param <E> encoded vector type.
+ * @param <D> decoded vector type, which is also the dictionary type.
+ */
+public class LinearDictionaryEncoder<E extends BaseIntVector, D extends ValueVector>
+ implements DictionaryEncoder<E, D> {
+
+ /**
+ * The dictionary for encoding.
+ */
+ private final D dictionary;
+
+ /**
+ * A flag indicating if null should be encoded.
+ */
+ private final boolean encodeNull;
+
+ private RangeEqualsVisitor equalizer;
+
+ private Range range;
+
+ /**
+ * Constructs a dictionary encoder, with the encode null flag set to false.
+ * @param dictionary the dictionary. Its entries should be sorted in the non-increasing order of their frequency.
+ * Otherwise, the encoder still produces correct results, but at the expense of performance overhead.
+ */
+ public LinearDictionaryEncoder(D dictionary) {
+ this(dictionary, false);
+ }
+
+ /**
+ * Constructs a dictionary encoder.
+ * @param dictionary the dictionary. Its entries should be sorted in the non-increasing order of their frequency.
+ * Otherwise, the encoder still produces correct results, but at the expense of performance overhead.
+ * @param encodeNull a flag indicating if null should be encoded.
+ * It determines the behaviors for processing null values in the input during encoding.
+ * When a null is encountered in the input,
+ * 1) If the flag is set to true, the encoder searches for the value in the dictionary,
+ * and outputs the index in the dictionary.
+ * 2) If the flag is set to false, the encoder simply produces a null in the output.
+ */
+ public LinearDictionaryEncoder(D dictionary, boolean encodeNull) {
+ this.dictionary = dictionary;
+ this.encodeNull = encodeNull;
+
+ // temporarily set left and right vectors to dictionary
+ equalizer = new RangeEqualsVisitor(dictionary, dictionary, null);
+ range = new Range(0, 0, 1);
+ }
+
+ /**
+ * Encodes an input vector by linear search.
+ * When the dictionary is sorted in the non-increasing order of the entry frequency,
+ * it will have constant time complexity, with no extra memory requirement.
+ * @param input the input vector.
+ * @param output the output vector. Note that it must be in a fresh state. At least,
+ * all its validity bits should be clear.
+ */
+ @Override
+ public void encode(D input, E output) {
+ for (int i = 0; i < input.getValueCount(); i++) {
+ if (!encodeNull && input.isNull(i)) {
+ // for this case, we should simply output a null in the output.
+ // by assuming the output vector is fresh, we do nothing here.
+ continue;
+ }
+
+ int index = linearSearch(input, i);
+ if (index == -1) {
+ throw new IllegalArgumentException("The data element is not found in the dictionary: " + i);
+ }
+ output.setWithPossibleTruncate(i, index);
+ }
+ output.setValueCount(input.getValueCount());
+ }
+
+ private int linearSearch(D input, int index) {
+ range.setLeftStart(index);
+ for (int i = 0; i < dictionary.getValueCount(); i++) {
+ range.setRightStart(i);
+ if (input.accept(equalizer, range)) {
+ return i;
+ }
+ }
+ return -1;
+ }
+}
diff --git a/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchDictionaryEncoder.java b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchDictionaryEncoder.java
new file mode 100644
index 000000000..1dbf65819
--- /dev/null
+++ b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchDictionaryEncoder.java
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.dictionary;
+
+import org.apache.arrow.algorithm.search.VectorSearcher;
+import org.apache.arrow.algorithm.sort.VectorValueComparator;
+import org.apache.arrow.vector.BaseIntVector;
+import org.apache.arrow.vector.ValueVector;
+
+/**
+ * Dictionary encoder based on searching.
+ * @param <E> encoded vector type.
+ * @param <D> decoded vector type, which is also the dictionary type.
+ */
+public class SearchDictionaryEncoder<E extends BaseIntVector, D extends ValueVector>
+ implements DictionaryEncoder<E, D> {
+
+ /**
+ * The dictionary for encoding/decoding.
+ * It must be sorted.
+ */
+ private final D dictionary;
+
+ /**
+ * The criteria by which the dictionary is sorted.
+ */
+ private final VectorValueComparator<D> comparator;
+
+ /**
+ * A flag indicating if null should be encoded.
+ */
+ private final boolean encodeNull;
+
+ /**
+ * Constructs a dictionary encoder.
+ * @param dictionary the dictionary. It must be in sorted order.
+ * @param comparator the criteria for sorting.
+ */
+ public SearchDictionaryEncoder(D dictionary, VectorValueComparator<D> comparator) {
+ this(dictionary, comparator, false);
+ }
+
+ /**
+ * Constructs a dictionary encoder.
+ * @param dictionary the dictionary. It must be in sorted order.
+ * @param comparator the criteria for sorting.
+ * @param encodeNull a flag indicating if null should be encoded.
+ * It determines the behaviors for processing null values in the input during encoding.
+ * When a null is encountered in the input,
+ * 1) If the flag is set to true, the encoder searches for the value in the dictionary,
+ * and outputs the index in the dictionary.
+ * 2) If the flag is set to false, the encoder simply produces a null in the output.
+ */
+ public SearchDictionaryEncoder(D dictionary, VectorValueComparator<D> comparator, boolean encodeNull) {
+ this.dictionary = dictionary;
+ this.comparator = comparator;
+ this.encodeNull = encodeNull;
+ }
+
+ /**
+ * Encodes an input vector by binary search.
+ * So the algorithm takes O(n * log(m)) time, where n is the length of the input vector,
+ * and m is the length of the dictionary.
+ * @param input the input vector.
+ * @param output the output vector. Note that it must be in a fresh state. At least,
+ * all its validity bits should be clear.
+ */
+ @Override
+ public void encode(D input, E output) {
+ for (int i = 0; i < input.getValueCount(); i++) {
+ if (!encodeNull && input.isNull(i)) {
+ // for this case, we should simply output a null in the output.
+ // by assuming the output vector is fresh, we do nothing here.
+ continue;
+ }
+
+ int index = VectorSearcher.binarySearch(dictionary, comparator, input, i);
+ if (index == -1) {
+ throw new IllegalArgumentException("The data element is not found in the dictionary: " + i);
+ }
+ output.setWithPossibleTruncate(i, index);
+ }
+ output.setValueCount(input.getValueCount());
+ }
+}
diff --git a/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchTreeBasedDictionaryBuilder.java b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchTreeBasedDictionaryBuilder.java
new file mode 100644
index 000000000..f9cd77daa
--- /dev/null
+++ b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchTreeBasedDictionaryBuilder.java
@@ -0,0 +1,146 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.dictionary;
+
+import java.util.TreeSet;
+
+import org.apache.arrow.algorithm.sort.VectorValueComparator;
+import org.apache.arrow.vector.ValueVector;
+
+/**
+ * This class builds the dictionary based on a binary search tree.
+ * Each add operation can be finished in O(log(n)) time,
+ * where n is the current dictionary size.
+ *
+ * @param <V> the dictionary vector type.
+ */
+public class SearchTreeBasedDictionaryBuilder<V extends ValueVector> implements DictionaryBuilder<V> {
+
+ /**
+ * The dictionary to be built.
+ */
+ private final V dictionary;
+
+ /**
+ * The criteria for sorting in the search tree.
+ */
+ protected final VectorValueComparator<V> comparator;
+
+ /**
+ * If null should be encoded.
+ */
+ private final boolean encodeNull;
+
+ /**
+ * The search tree for storing the value index.
+ */
+ private TreeSet<Integer> searchTree;
+
+ /**
+ * Construct a search tree-based dictionary builder.
+ * @param dictionary the dictionary vector.
+ * @param comparator the criteria for value equality.
+ */
+ public SearchTreeBasedDictionaryBuilder(V dictionary, VectorValueComparator<V> comparator) {
+ this(dictionary, comparator, false);
+ }
+
+ /**
+ * Construct a search tree-based dictionary builder.
+ * @param dictionary the dictionary vector.
+ * @param comparator the criteria for value equality.
+ * @param encodeNull if null values should be added to the dictionary.
+ */
+ public SearchTreeBasedDictionaryBuilder(V dictionary, VectorValueComparator<V> comparator, boolean encodeNull) {
+ this.dictionary = dictionary;
+ this.comparator = comparator;
+ this.encodeNull = encodeNull;
+ this.comparator.attachVector(dictionary);
+
+ searchTree = new TreeSet<>((index1, index2) -> comparator.compare(index1, index2));
+ }
+
+ /**
+ * Gets the dictionary built.
+ * Please note that the dictionary is not in sorted order.
+ * Instead, its order is determined by the order of element insertion.
+ * To get the dictionary in sorted order, please use
+ * {@link SearchTreeBasedDictionaryBuilder#populateSortedDictionary(ValueVector)}.
+ * @return the dictionary.
+ */
+ @Override
+ public V getDictionary() {
+ return dictionary;
+ }
+
+ /**
+ * Try to add all values from the target vector to the dictionary.
+ * @param targetVector the target vector containing values to probe.
+ * @return the number of values actually added to the dictionary.
+ */
+ @Override
+ public int addValues(V targetVector) {
+ int oldDictSize = dictionary.getValueCount();
+ for (int i = 0; i < targetVector.getValueCount(); i++) {
+ if (!encodeNull && targetVector.isNull(i)) {
+ continue;
+ }
+ addValue(targetVector, i);
+ }
+ return dictionary.getValueCount() - oldDictSize;
+ }
+
+ /**
+ * Try to add an element from the target vector to the dictionary.
+ * @param targetVector the target vector containing new element.
+ * @param targetIndex the index of the new element in the target vector.
+ * @return the index of the new element in the dictionary.
+ */
+ @Override
+ public int addValue(V targetVector, int targetIndex) {
+ // first copy the value to the end of the dictionary
+ int dictSize = dictionary.getValueCount();
+ dictionary.copyFromSafe(targetIndex, dictSize, targetVector);
+
+ // try to add the value to the dictionary,
+ // if an equal element does not exist.
+ // this operation can be done in O(log(n)) time.
+ if (searchTree.add(dictSize)) {
+ // the element is successfully added
+ dictionary.setValueCount(dictSize + 1);
+ return dictSize;
+ } else {
+ // the element is already in the dictionary
+ // find its index in O(log(n)) time.
+ return searchTree.ceiling(dictSize);
+ }
+ }
+
+ /**
+ * Gets the sorted dictionary.
+ * Note that given the binary search tree, the sort can finish in O(n).
+ */
+ public void populateSortedDictionary(V sortedDictionary) {
+ int idx = 0;
+ for (Integer dictIdx : searchTree) {
+ sortedDictionary.copyFromSafe(dictIdx, idx++, dictionary);
+ }
+
+ sortedDictionary.setValueCount(dictionary.getValueCount());
+ }
+}
diff --git a/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/misc/PartialSumUtils.java b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/misc/PartialSumUtils.java
new file mode 100644
index 000000000..f5e95cf10
--- /dev/null
+++ b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/misc/PartialSumUtils.java
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.misc;
+
+import org.apache.arrow.vector.BaseIntVector;
+
+/**
+ * Partial sum related utilities.
+ */
+public class PartialSumUtils {
+
+ /**
+ * Converts an input vector to a partial sum vector.
+ * This is an inverse operation of {@link PartialSumUtils#toDeltaVector(BaseIntVector, BaseIntVector)}.
+ * Suppose we have input vector a and output vector b.
+ * Then we have b(0) = sumBase; b(i + 1) = b(i) + a(i) (i = 0, 1, 2, ...).
+ * @param deltaVector the input vector.
+ * @param partialSumVector the output vector.
+ * @param sumBase the base of the partial sums.
+ */
+ public static void toPartialSumVector(BaseIntVector deltaVector, BaseIntVector partialSumVector, long sumBase) {
+ long sum = sumBase;
+ partialSumVector.setWithPossibleTruncate(0, sumBase);
+
+ for (int i = 0; i < deltaVector.getValueCount(); i++) {
+ sum += deltaVector.getValueAsLong(i);
+ partialSumVector.setWithPossibleTruncate(i + 1, sum);
+ }
+ partialSumVector.setValueCount(deltaVector.getValueCount() + 1);
+ }
+
+ /**
+ * Converts an input vector to the delta vector.
+ * This is an inverse operation of {@link PartialSumUtils#toPartialSumVector(BaseIntVector, BaseIntVector, long)}.
+ * Suppose we have input vector a and output vector b.
+ * Then we have b(i) = a(i + 1) - a(i) (i = 0, 1, 2, ...).
+ * @param partialSumVector the input vector.
+ * @param deltaVector the output vector.
+ */
+ public static void toDeltaVector(BaseIntVector partialSumVector, BaseIntVector deltaVector) {
+ for (int i = 0; i < partialSumVector.getValueCount() - 1; i++) {
+ long delta = partialSumVector.getValueAsLong(i + 1) - partialSumVector.getValueAsLong(i);
+ deltaVector.setWithPossibleTruncate(i, delta);
+ }
+ deltaVector.setValueCount(partialSumVector.getValueCount() - 1);
+ }
+
+ /**
+ * Given a value and a partial sum vector, finds its position in the partial sum vector.
+ * In particular, given an integer value a and partial sum vector v, we try to find a
+ * position i, so that v(i) <= a < v(i + 1).
+ * The algorithm is based on binary search, so it takes O(log(n)) time, where n is
+ * the length of the partial sum vector.
+ * @param partialSumVector the input partial sum vector.
+ * @param value the value to search.
+ * @return the position in the partial sum vector, if any, or -1, if none is found.
+ */
+ public static int findPositionInPartialSumVector(BaseIntVector partialSumVector, long value) {
+ if (value < partialSumVector.getValueAsLong(0) ||
+ value >= partialSumVector.getValueAsLong(partialSumVector.getValueCount() - 1)) {
+ return -1;
+ }
+
+ int low = 0;
+ int high = partialSumVector.getValueCount() - 1;
+ while (low <= high) {
+ int mid = low + (high - low) / 2;
+ long midValue = partialSumVector.getValueAsLong(mid);
+
+ if (midValue <= value) {
+ if (mid == partialSumVector.getValueCount() - 1) {
+ // the mid is the last element, we have found it
+ return mid;
+ }
+ long nextMidValue = partialSumVector.getValueAsLong(mid + 1);
+ if (value < nextMidValue) {
+ // midValue <= value < nextMidValue
+ // this is exactly what we want.
+ return mid;
+ } else {
+ // value >= nextMidValue
+ // continue to search from the next value on the right
+ low = mid + 1;
+ }
+ } else {
+ // midValue > value
+ long prevMidValue = partialSumVector.getValueAsLong(mid - 1);
+ if (prevMidValue <= value) {
+ // prevMidValue <= value < midValue
+ // this is exactly what we want
+ return mid - 1;
+ } else {
+ // prevMidValue > value
+ // continue to search from the previous value on the left
+ high = mid - 1;
+ }
+ }
+ }
+ throw new IllegalStateException("Should never get here");
+ }
+
+ private PartialSumUtils() {
+ }
+}
diff --git a/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/rank/VectorRank.java b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/rank/VectorRank.java
new file mode 100644
index 000000000..43c9a5b01
--- /dev/null
+++ b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/rank/VectorRank.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.rank;
+
+import java.util.stream.IntStream;
+
+import org.apache.arrow.algorithm.sort.IndexSorter;
+import org.apache.arrow.algorithm.sort.VectorValueComparator;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.ValueVector;
+
+/**
+ * Utility for calculating ranks of vector elements.
+ * @param <V> the vector type
+ */
+public class VectorRank<V extends ValueVector> {
+
+ private VectorValueComparator<V> comparator;
+
+ /**
+ * Vector indices.
+ */
+ private IntVector indices;
+
+ private final BufferAllocator allocator;
+
+ /**
+ * Constructs a vector rank utility.
+ * @param allocator the allocator to use.
+ */
+ public VectorRank(BufferAllocator allocator) {
+ this.allocator = allocator;
+ }
+
+ /**
+ * Given a rank r, gets the index of the element that is the rth smallest in the vector.
+ * The operation is performed without changing the vector, and takes O(n) time,
+ * where n is the length of the vector.
+ * @param vector the vector from which to get the element index.
+ * @param comparator the criteria for vector element comparison.
+ * @param rank the rank to determine.
+ * @return the element index with the given rank.
+ */
+ public int indexAtRank(V vector, VectorValueComparator<V> comparator, int rank) {
+ Preconditions.checkArgument(rank >= 0 && rank < vector.getValueCount());
+ try {
+ indices = new IntVector("index vector", allocator);
+ indices.allocateNew(vector.getValueCount());
+ IntStream.range(0, vector.getValueCount()).forEach(i -> indices.set(i, i));
+
+ comparator.attachVector(vector);
+ this.comparator = comparator;
+
+ int pos = getRank(0, vector.getValueCount() - 1, rank);
+ return indices.get(pos);
+ } finally {
+ indices.close();
+ }
+ }
+
+ private int getRank(int low, int high, int rank) {
+ int mid = IndexSorter.partition(low, high, indices, comparator);
+ if (mid < rank) {
+ return getRank(mid + 1, high, rank);
+ } else if (mid > rank) {
+ return getRank(low, mid - 1, rank);
+ } else {
+ // mid == rank
+ return mid;
+ }
+ }
+}
diff --git a/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java
new file mode 100644
index 000000000..e93eb2c3d
--- /dev/null
+++ b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java
@@ -0,0 +1,190 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.search;
+
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+
+import org.apache.arrow.algorithm.sort.VectorValueComparator;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.compare.Range;
+import org.apache.arrow.vector.compare.RangeEqualsVisitor;
+
+/**
+ * Search for a value in the vector by multiple threads.
+ * This is often used in scenarios where the vector is large or
+ * low response time is required.
+ * @param <V> the vector type.
+ */
+public class ParallelSearcher<V extends ValueVector> {
+
+ /**
+ * The target vector to search.
+ */
+ private final V vector;
+
+ /**
+ * The thread pool.
+ */
+ private final ExecutorService threadPool;
+
+ /**
+ * The number of threads to use.
+ */
+ private final int numThreads;
+
+ /**
+ * The position of the key in the target vector, if any.
+ */
+ private int keyPosition = -1;
+
+ /**
+ * Constructs a parallel searcher.
+ * @param vector the vector to search.
+ * @param threadPool the thread pool to use.
+ * @param numThreads the number of threads to use.
+ */
+ public ParallelSearcher(V vector, ExecutorService threadPool, int numThreads) {
+ this.vector = vector;
+ this.threadPool = threadPool;
+ this.numThreads = numThreads;
+ }
+
+ private CompletableFuture<Boolean>[] initSearch() {
+ keyPosition = -1;
+ final CompletableFuture<Boolean>[] futures = new CompletableFuture[numThreads];
+ for (int i = 0; i < futures.length; i++) {
+ futures[i] = new CompletableFuture<>();
+ }
+ return futures;
+ }
+
+ /**
+ * Search for the key in the target vector. The element-wise comparison is based on
+ * {@link RangeEqualsVisitor}, so there are two possible results for each element-wise
+ * comparison: equal and un-equal.
+ * @param keyVector the vector containing the search key.
+ * @param keyIndex the index of the search key in the key vector.
+ * @return the position of a matched value in the target vector,
+ * or -1 if none is found. Please note that if there are multiple
+ * matches of the key in the target vector, this method makes no
+ * guarantees about which instance is returned.
+ * For an alternative search implementation that always finds the first match of the key,
+ * see {@link VectorSearcher#linearSearch(ValueVector, VectorValueComparator, ValueVector, int)}.
+ * @throws ExecutionException if an exception occurs in a thread.
+ * @throws InterruptedException if a thread is interrupted.
+ */
+ public int search(V keyVector, int keyIndex) throws ExecutionException, InterruptedException {
+ final CompletableFuture<Boolean>[] futures = initSearch();
+ final int valueCount = vector.getValueCount();
+ for (int i = 0; i < numThreads; i++) {
+ final int tid = i;
+ threadPool.submit(() -> {
+ // convert to long to avoid overflow
+ int start = (int) (((long) valueCount) * tid / numThreads);
+ int end = (int) ((long) valueCount) * (tid + 1) / numThreads;
+
+ if (start >= end) {
+ // no data assigned to this task.
+ futures[tid].complete(false);
+ return;
+ }
+
+ RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector, keyVector, null);
+ Range range = new Range(0, 0, 1);
+ for (int pos = start; pos < end; pos++) {
+ if (keyPosition != -1) {
+ // the key has been found by another task
+ futures[tid].complete(false);
+ return;
+ }
+ range.setLeftStart(pos).setRightStart(keyIndex);
+ if (visitor.rangeEquals(range)) {
+ keyPosition = pos;
+ futures[tid].complete(true);
+ return;
+ }
+ }
+
+ // no match value is found.
+ futures[tid].complete(false);
+ });
+ }
+
+ CompletableFuture.allOf(futures).get();
+ return keyPosition;
+ }
+
+ /**
+ * Search for the key in the target vector. The element-wise comparison is based on
+ * {@link VectorValueComparator}, so there are three possible results for each element-wise
+ * comparison: less than, equal to and greater than.
+ * @param keyVector the vector containing the search key.
+ * @param keyIndex the index of the search key in the key vector.
+ * @param comparator the comparator for comparing the key against vector elements.
+ * @return the position of a matched value in the target vector,
+ * or -1 if none is found. Please note that if there are multiple
+ * matches of the key in the target vector, this method makes no
+ * guarantees about which instance is returned.
+ * For an alternative search implementation that always finds the first match of the key,
+ * see {@link VectorSearcher#linearSearch(ValueVector, VectorValueComparator, ValueVector, int)}.
+ * @throws ExecutionException if an exception occurs in a thread.
+ * @throws InterruptedException if a thread is interrupted.
+ */
+ public int search(
+ V keyVector, int keyIndex, VectorValueComparator<V> comparator) throws ExecutionException, InterruptedException {
+ final CompletableFuture<Boolean>[] futures = initSearch();
+ final int valueCount = vector.getValueCount();
+ for (int i = 0; i < numThreads; i++) {
+ final int tid = i;
+ threadPool.submit(() -> {
+ // convert to long to avoid overflow
+ int start = (int) (((long) valueCount) * tid / numThreads);
+ int end = (int) ((long) valueCount) * (tid + 1) / numThreads;
+
+ if (start >= end) {
+ // no data assigned to this task.
+ futures[tid].complete(false);
+ return;
+ }
+
+ VectorValueComparator<V> localComparator = comparator.createNew();
+ localComparator.attachVectors(vector, keyVector);
+ for (int pos = start; pos < end; pos++) {
+ if (keyPosition != -1) {
+ // the key has been found by another task
+ futures[tid].complete(false);
+ return;
+ }
+ if (localComparator.compare(pos, keyIndex) == 0) {
+ keyPosition = pos;
+ futures[tid].complete(true);
+ return;
+ }
+ }
+
+ // no match value is found.
+ futures[tid].complete(false);
+ });
+ }
+
+ CompletableFuture.allOf(futures).get();
+ return keyPosition;
+ }
+}
diff --git a/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorRangeSearcher.java b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorRangeSearcher.java
new file mode 100644
index 000000000..249194843
--- /dev/null
+++ b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorRangeSearcher.java
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.search;
+
+import org.apache.arrow.algorithm.sort.VectorValueComparator;
+import org.apache.arrow.vector.ValueVector;
+
+/**
+ * Search for the range of a particular element in the target vector.
+ */
+public class VectorRangeSearcher {
+
+ /**
+ * Result returned when a search fails.
+ */
+ public static final int SEARCH_FAIL_RESULT = -1;
+
+ /**
+ * Search for the first occurrence of an element.
+ * The search is based on the binary search algorithm. So the target vector must be sorted.
+ * @param targetVector the vector from which to perform the search.
+ * @param comparator the criterion for the comparison.
+ * @param keyVector the vector containing the element to search.
+ * @param keyIndex the index of the search key in the key vector.
+ * @param <V> the vector type.
+ * @return the index of the first matched element if any, and -1 otherwise.
+ */
+ public static <V extends ValueVector> int getFirstMatch(
+ V targetVector, VectorValueComparator<V> comparator, V keyVector, int keyIndex) {
+ comparator.attachVectors(keyVector, targetVector);
+
+ int ret = SEARCH_FAIL_RESULT;
+
+ int low = 0;
+ int high = targetVector.getValueCount() - 1;
+
+ while (low <= high) {
+ int mid = low + (high - low) / 2;
+ int result = comparator.compare(keyIndex, mid);
+ if (result < 0) {
+ // the key is smaller
+ high = mid - 1;
+ } else if (result > 0) {
+ // the key is larger
+ low = mid + 1;
+ } else {
+ // an equal element is found
+ // continue to go left-ward
+ ret = mid;
+ high = mid - 1;
+ }
+ }
+ return ret;
+ }
+
+ /**
+ * Search for the last occurrence of an element.
+ * The search is based on the binary search algorithm. So the target vector must be sorted.
+ * @param targetVector the vector from which to perform the search.
+ * @param comparator the criterion for the comparison.
+ * @param keyVector the vector containing the element to search.
+ * @param keyIndex the index of the search key in the key vector.
+ * @param <V> the vector type.
+ * @return the index of the last matched element if any, and -1 otherwise.
+ */
+ public static <V extends ValueVector> int getLastMatch(
+ V targetVector, VectorValueComparator<V> comparator, V keyVector, int keyIndex) {
+ comparator.attachVectors(keyVector, targetVector);
+
+ int ret = SEARCH_FAIL_RESULT;
+
+ int low = 0;
+ int high = targetVector.getValueCount() - 1;
+
+ while (low <= high) {
+ int mid = low + (high - low) / 2;
+ int result = comparator.compare(keyIndex, mid);
+ if (result < 0) {
+ // the key is smaller
+ high = mid - 1;
+ } else if (result > 0) {
+ // the key is larger
+ low = mid + 1;
+ } else {
+ // an equal element is found,
+ // continue to go right-ward
+ ret = mid;
+ low = mid + 1;
+ }
+ }
+ return ret;
+ }
+}
diff --git a/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorSearcher.java b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorSearcher.java
new file mode 100644
index 000000000..646bca01b
--- /dev/null
+++ b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorSearcher.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.search;
+
+import org.apache.arrow.algorithm.sort.VectorValueComparator;
+import org.apache.arrow.vector.ValueVector;
+
+/**
+ * Search for a particular element in the vector.
+ */
+public final class VectorSearcher {
+
+ /**
+ * Result returned when a search fails.
+ */
+ public static final int SEARCH_FAIL_RESULT = -1;
+
+ /**
+ * Search for a particular element from the key vector in the target vector by binary search.
+ * The target vector must be sorted.
+ * @param targetVector the vector from which to perform the sort.
+ * @param comparator the criterion for the sort.
+ * @param keyVector the vector containing the element to search.
+ * @param keyIndex the index of the search key in the key vector.
+ * @param <V> the vector type.
+ * @return the index of a matched element if any, and -1 otherwise.
+ */
+ public static <V extends ValueVector> int binarySearch(
+ V targetVector, VectorValueComparator<V> comparator, V keyVector, int keyIndex) {
+ comparator.attachVectors(keyVector, targetVector);
+
+ // perform binary search
+ int low = 0;
+ int high = targetVector.getValueCount() - 1;
+
+ while (low <= high) {
+ int mid = low + (high - low) / 2;
+ int cmp = comparator.compare(keyIndex, mid);
+ if (cmp < 0) {
+ high = mid - 1;
+ } else if (cmp > 0) {
+ low = mid + 1;
+ } else {
+ return mid;
+ }
+ }
+ return SEARCH_FAIL_RESULT;
+ }
+
+ /**
+ * Search for a particular element from the key vector in the target vector by traversing the vector in sequence.
+ * @param targetVector the vector from which to perform the search.
+ * @param comparator the criterion for element equality.
+ * @param keyVector the vector containing the element to search.
+ * @param keyIndex the index of the search key in the key vector.
+ * @param <V> the vector type.
+ * @return the index of a matched element if any, and -1 otherwise.
+ */
+ public static <V extends ValueVector> int linearSearch(
+ V targetVector, VectorValueComparator<V> comparator, V keyVector, int keyIndex) {
+ comparator.attachVectors(keyVector, targetVector);
+ for (int i = 0; i < targetVector.getValueCount(); i++) {
+ if (comparator.compare(keyIndex, i) == 0) {
+ return i;
+ }
+ }
+ return SEARCH_FAIL_RESULT;
+ }
+
+ private VectorSearcher() {
+
+ }
+}
diff --git a/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/CompositeVectorComparator.java b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/CompositeVectorComparator.java
new file mode 100644
index 000000000..ec74598e0
--- /dev/null
+++ b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/CompositeVectorComparator.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.sort;
+
+import org.apache.arrow.vector.ValueVector;
+
+/**
+ * A composite vector comparator compares a number of vectors
+ * by a number of inner comparators.
+ * <p>
+ * It works by first using the first comparator, if a non-zero value
+ * is returned, it simply returns it. Otherwise, it uses the second comparator,
+ * and so on, until a non-zero value is produced, or all inner comparators have
+ * been used.
+ * </p>
+ */
+public class CompositeVectorComparator extends VectorValueComparator<ValueVector> {
+
+ private final VectorValueComparator[] innerComparators;
+
+ public CompositeVectorComparator(VectorValueComparator[] innerComparators) {
+ this.innerComparators = innerComparators;
+ }
+
+ @Override
+ public int compareNotNull(int index1, int index2) {
+ // short-cut for scenarios when the caller can be sure that the vectors are non-nullable.
+ for (int i = 0; i < innerComparators.length; i++) {
+ int result = innerComparators[i].compareNotNull(index1, index2);
+ if (result != 0) {
+ return result;
+ }
+ }
+ return 0;
+ }
+
+ @Override
+ public int compare(int index1, int index2) {
+ for (int i = 0; i < innerComparators.length; i++) {
+ int result = innerComparators[i].compare(index1, index2);
+ if (result != 0) {
+ return result;
+ }
+ }
+ return 0;
+ }
+
+ @Override
+ public VectorValueComparator<ValueVector> createNew() {
+ VectorValueComparator[] newInnerComparators = new VectorValueComparator[innerComparators.length];
+ for (int i = 0; i < innerComparators.length; i++) {
+ newInnerComparators[i] = innerComparators[i].createNew();
+ }
+ return new CompositeVectorComparator(newInnerComparators);
+ }
+}
diff --git a/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java
new file mode 100644
index 000000000..c41821917
--- /dev/null
+++ b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java
@@ -0,0 +1,431 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.sort;
+
+import static org.apache.arrow.vector.complex.BaseRepeatedValueVector.OFFSET_WIDTH;
+
+import org.apache.arrow.memory.util.ArrowBufPointer;
+import org.apache.arrow.memory.util.ByteFunctionHelpers;
+import org.apache.arrow.vector.BaseFixedWidthVector;
+import org.apache.arrow.vector.BaseVariableWidthVector;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.SmallIntVector;
+import org.apache.arrow.vector.TinyIntVector;
+import org.apache.arrow.vector.UInt1Vector;
+import org.apache.arrow.vector.UInt2Vector;
+import org.apache.arrow.vector.UInt4Vector;
+import org.apache.arrow.vector.UInt8Vector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.complex.BaseRepeatedValueVector;
+
+/**
+ * Default comparator implementations for different types of vectors.
+ */
+public class DefaultVectorComparators {
+
+ /**
+ * Create the default comparator for the vector.
+ * @param vector the vector.
+ * @param <T> the vector type.
+ * @return the default comparator.
+ */
+ public static <T extends ValueVector> VectorValueComparator<T> createDefaultComparator(T vector) {
+ if (vector instanceof BaseFixedWidthVector) {
+ if (vector instanceof TinyIntVector) {
+ return (VectorValueComparator<T>) new ByteComparator();
+ } else if (vector instanceof SmallIntVector) {
+ return (VectorValueComparator<T>) new ShortComparator();
+ } else if (vector instanceof IntVector) {
+ return (VectorValueComparator<T>) new IntComparator();
+ } else if (vector instanceof BigIntVector) {
+ return (VectorValueComparator<T>) new LongComparator();
+ } else if (vector instanceof Float4Vector) {
+ return (VectorValueComparator<T>) new Float4Comparator();
+ } else if (vector instanceof Float8Vector) {
+ return (VectorValueComparator<T>) new Float8Comparator();
+ } else if (vector instanceof UInt1Vector) {
+ return (VectorValueComparator<T>) new UInt1Comparator();
+ } else if (vector instanceof UInt2Vector) {
+ return (VectorValueComparator<T>) new UInt2Comparator();
+ } else if (vector instanceof UInt4Vector) {
+ return (VectorValueComparator<T>) new UInt4Comparator();
+ } else if (vector instanceof UInt8Vector) {
+ return (VectorValueComparator<T>) new UInt8Comparator();
+ }
+ } else if (vector instanceof BaseVariableWidthVector) {
+ return (VectorValueComparator<T>) new VariableWidthComparator();
+ } else if (vector instanceof BaseRepeatedValueVector) {
+ VectorValueComparator<?> innerComparator =
+ createDefaultComparator(((BaseRepeatedValueVector) vector).getDataVector());
+ return new RepeatedValueComparator(innerComparator);
+ }
+
+ throw new IllegalArgumentException("No default comparator for " + vector.getClass().getCanonicalName());
+ }
+
+ /**
+ * Default comparator for bytes.
+ * The comparison is based on values, with null comes first.
+ */
+ public static class ByteComparator extends VectorValueComparator<TinyIntVector> {
+
+ public ByteComparator() {
+ super(Byte.SIZE / 8);
+ }
+
+ @Override
+ public int compareNotNull(int index1, int index2) {
+ byte value1 = vector1.get(index1);
+ byte value2 = vector2.get(index2);
+ return value1 - value2;
+ }
+
+ @Override
+ public VectorValueComparator<TinyIntVector> createNew() {
+ return new ByteComparator();
+ }
+ }
+
+ /**
+ * Default comparator for short integers.
+ * The comparison is based on values, with null comes first.
+ */
+ public static class ShortComparator extends VectorValueComparator<SmallIntVector> {
+
+ public ShortComparator() {
+ super(Short.SIZE / 8);
+ }
+
+ @Override
+ public int compareNotNull(int index1, int index2) {
+ short value1 = vector1.get(index1);
+ short value2 = vector2.get(index2);
+ return value1 - value2;
+ }
+
+ @Override
+ public VectorValueComparator<SmallIntVector> createNew() {
+ return new ShortComparator();
+ }
+ }
+
+ /**
+ * Default comparator for 32-bit integers.
+ * The comparison is based on int values, with null comes first.
+ */
+ public static class IntComparator extends VectorValueComparator<IntVector> {
+
+ public IntComparator() {
+ super(Integer.SIZE / 8);
+ }
+
+ @Override
+ public int compareNotNull(int index1, int index2) {
+ int value1 = vector1.get(index1);
+ int value2 = vector2.get(index2);
+ return Integer.compare(value1, value2);
+ }
+
+ @Override
+ public VectorValueComparator<IntVector> createNew() {
+ return new IntComparator();
+ }
+ }
+
+ /**
+ * Default comparator for long integers.
+ * The comparison is based on values, with null comes first.
+ */
+ public static class LongComparator extends VectorValueComparator<BigIntVector> {
+
+ public LongComparator() {
+ super(Long.SIZE / 8);
+ }
+
+ @Override
+ public int compareNotNull(int index1, int index2) {
+ long value1 = vector1.get(index1);
+ long value2 = vector2.get(index2);
+
+ return Long.compare(value1, value2);
+ }
+
+ @Override
+ public VectorValueComparator<BigIntVector> createNew() {
+ return new LongComparator();
+ }
+ }
+
+ /**
+ * Default comparator for unsigned bytes.
+ * The comparison is based on values, with null comes first.
+ */
+ public static class UInt1Comparator extends VectorValueComparator<UInt1Vector> {
+
+ public UInt1Comparator() {
+ super(1);
+ }
+
+ @Override
+ public int compareNotNull(int index1, int index2) {
+ byte value1 = vector1.get(index1);
+ byte value2 = vector2.get(index2);
+
+ return (value1 & 0xff) - (value2 & 0xff);
+ }
+
+ @Override
+ public VectorValueComparator<UInt1Vector> createNew() {
+ return new UInt1Comparator();
+ }
+ }
+
+ /**
+ * Default comparator for unsigned short integer.
+ * The comparison is based on values, with null comes first.
+ */
+ public static class UInt2Comparator extends VectorValueComparator<UInt2Vector> {
+
+ public UInt2Comparator() {
+ super(2);
+ }
+
+ @Override
+ public int compareNotNull(int index1, int index2) {
+ char value1 = vector1.get(index1);
+ char value2 = vector2.get(index2);
+
+ // please note that we should not use the built-in
+ // Character#compare method here, as that method
+ // essentially compares char values as signed integers.
+ return (value1 & 0xffff) - (value2 & 0xffff);
+ }
+
+ @Override
+ public VectorValueComparator<UInt2Vector> createNew() {
+ return new UInt2Comparator();
+ }
+ }
+
+ /**
+ * Default comparator for unsigned integer.
+ * The comparison is based on values, with null comes first.
+ */
+ public static class UInt4Comparator extends VectorValueComparator<UInt4Vector> {
+
+ public UInt4Comparator() {
+ super(4);
+ }
+
+ @Override
+ public int compareNotNull(int index1, int index2) {
+ int value1 = vector1.get(index1);
+ int value2 = vector2.get(index2);
+ return ByteFunctionHelpers.unsignedIntCompare(value1, value2);
+ }
+
+ @Override
+ public VectorValueComparator<UInt4Vector> createNew() {
+ return new UInt4Comparator();
+ }
+ }
+
+ /**
+ * Default comparator for unsigned long integer.
+ * The comparison is based on values, with null comes first.
+ */
+ public static class UInt8Comparator extends VectorValueComparator<UInt8Vector> {
+
+ public UInt8Comparator() {
+ super(8);
+ }
+
+ @Override
+ public int compareNotNull(int index1, int index2) {
+ long value1 = vector1.get(index1);
+ long value2 = vector2.get(index2);
+ return ByteFunctionHelpers.unsignedLongCompare(value1, value2);
+ }
+
+ @Override
+ public VectorValueComparator<UInt8Vector> createNew() {
+ return new UInt8Comparator();
+ }
+ }
+
+ /**
+ * Default comparator for float type.
+ * The comparison is based on values, with null comes first.
+ */
+ public static class Float4Comparator extends VectorValueComparator<Float4Vector> {
+
+ public Float4Comparator() {
+ super(Float.SIZE / 8);
+ }
+
+ @Override
+ public int compareNotNull(int index1, int index2) {
+ float value1 = vector1.get(index1);
+ float value2 = vector2.get(index2);
+
+ boolean isNan1 = Float.isNaN(value1);
+ boolean isNan2 = Float.isNaN(value2);
+ if (isNan1 || isNan2) {
+ if (isNan1 && isNan2) {
+ return 0;
+ } else if (isNan1) {
+ // nan is greater than any normal value
+ return 1;
+ } else {
+ return -1;
+ }
+ }
+
+ return (int) Math.signum(value1 - value2);
+ }
+
+ @Override
+ public VectorValueComparator<Float4Vector> createNew() {
+ return new Float4Comparator();
+ }
+ }
+
+ /**
+ * Default comparator for double type.
+ * The comparison is based on values, with null comes first.
+ */
+ public static class Float8Comparator extends VectorValueComparator<Float8Vector> {
+
+ public Float8Comparator() {
+ super(Double.SIZE / 8);
+ }
+
+ @Override
+ public int compareNotNull(int index1, int index2) {
+ double value1 = vector1.get(index1);
+ double value2 = vector2.get(index2);
+
+ boolean isNan1 = Double.isNaN(value1);
+ boolean isNan2 = Double.isNaN(value2);
+ if (isNan1 || isNan2) {
+ if (isNan1 && isNan2) {
+ return 0;
+ } else if (isNan1) {
+ // nan is greater than any normal value
+ return 1;
+ } else {
+ return -1;
+ }
+ }
+
+ return (int) Math.signum(value1 - value2);
+ }
+
+ @Override
+ public VectorValueComparator<Float8Vector> createNew() {
+ return new Float8Comparator();
+ }
+ }
+
+ /**
+ * Default comparator for {@link org.apache.arrow.vector.BaseVariableWidthVector}.
+ * The comparison is in lexicographic order, with null comes first.
+ */
+ public static class VariableWidthComparator extends VectorValueComparator<BaseVariableWidthVector> {
+
+ private ArrowBufPointer reusablePointer1 = new ArrowBufPointer();
+
+ private ArrowBufPointer reusablePointer2 = new ArrowBufPointer();
+
+ @Override
+ public int compare(int index1, int index2) {
+ vector1.getDataPointer(index1, reusablePointer1);
+ vector2.getDataPointer(index2, reusablePointer2);
+ return reusablePointer1.compareTo(reusablePointer2);
+ }
+
+ @Override
+ public int compareNotNull(int index1, int index2) {
+ vector1.getDataPointer(index1, reusablePointer1);
+ vector2.getDataPointer(index2, reusablePointer2);
+ return reusablePointer1.compareTo(reusablePointer2);
+ }
+
+ @Override
+ public VectorValueComparator<BaseVariableWidthVector> createNew() {
+ return new VariableWidthComparator();
+ }
+ }
+
+ /**
+ * Default comparator for {@link BaseRepeatedValueVector}.
+ * It works by comparing the underlying vector in a lexicographic order.
+ * @param <T> inner vector type.
+ */
+ public static class RepeatedValueComparator<T extends ValueVector>
+ extends VectorValueComparator<BaseRepeatedValueVector> {
+
+ private VectorValueComparator<T> innerComparator;
+
+ public RepeatedValueComparator(VectorValueComparator<T> innerComparator) {
+ this.innerComparator = innerComparator;
+ }
+
+ @Override
+ public int compareNotNull(int index1, int index2) {
+ int startIdx1 = vector1.getOffsetBuffer().getInt(index1 * OFFSET_WIDTH);
+ int startIdx2 = vector2.getOffsetBuffer().getInt(index2 * OFFSET_WIDTH);
+
+ int endIdx1 = vector1.getOffsetBuffer().getInt((index1 + 1) * OFFSET_WIDTH);
+ int endIdx2 = vector2.getOffsetBuffer().getInt((index2 + 1) * OFFSET_WIDTH);
+
+ int length1 = endIdx1 - startIdx1;
+ int length2 = endIdx2 - startIdx2;
+
+ int length = length1 < length2 ? length1 : length2;
+
+ for (int i = 0; i < length; i++) {
+ int result = innerComparator.compare(startIdx1 + i, startIdx2 + i);
+ if (result != 0) {
+ return result;
+ }
+ }
+ return length1 - length2;
+ }
+
+ @Override
+ public VectorValueComparator<BaseRepeatedValueVector> createNew() {
+ VectorValueComparator<T> newInnerComparator = innerComparator.createNew();
+ return new RepeatedValueComparator(newInnerComparator);
+ }
+
+ @Override
+ public void attachVectors(BaseRepeatedValueVector vector1, BaseRepeatedValueVector vector2) {
+ this.vector1 = vector1;
+ this.vector2 = vector2;
+
+ innerComparator.attachVectors((T) vector1.getDataVector(), (T) vector2.getDataVector());
+ }
+ }
+
+ private DefaultVectorComparators() {
+ }
+}
diff --git a/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java
new file mode 100644
index 000000000..aaa7ba117
--- /dev/null
+++ b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java
@@ -0,0 +1,169 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.sort;
+
+import org.apache.arrow.vector.BaseFixedWidthVector;
+
+/**
+ * Default in-place sorter for fixed-width vectors.
+ * It is based on quick-sort, with average time complexity O(n*log(n)).
+ * @param <V> vector type.
+ */
+public class FixedWidthInPlaceVectorSorter<V extends BaseFixedWidthVector> implements InPlaceVectorSorter<V> {
+
+ /**
+ * If the number of items is smaller than this threshold, we will use another algorithm to sort the data.
+ */
+ public static final int CHANGE_ALGORITHM_THRESHOLD = 15;
+
+ static final int STOP_CHOOSING_PIVOT_THRESHOLD = 3;
+
+ VectorValueComparator<V> comparator;
+
+ /**
+ * The vector to sort.
+ */
+ V vec;
+
+ /**
+ * The buffer to hold the pivot.
+ * It always has length 1.
+ */
+ V pivotBuffer;
+
+ @Override
+ public void sortInPlace(V vec, VectorValueComparator<V> comparator) {
+ try {
+ this.vec = vec;
+ this.comparator = comparator;
+ this.pivotBuffer = (V) vec.getField().createVector(vec.getAllocator());
+ this.pivotBuffer.allocateNew(1);
+ this.pivotBuffer.setValueCount(1);
+
+ comparator.attachVectors(vec, pivotBuffer);
+ quickSort();
+ } finally {
+ this.pivotBuffer.close();
+ }
+ }
+
+ private void quickSort() {
+ try (OffHeapIntStack rangeStack = new OffHeapIntStack(vec.getAllocator())) {
+ rangeStack.push(0);
+ rangeStack.push(vec.getValueCount() - 1);
+
+ while (!rangeStack.isEmpty()) {
+ int high = rangeStack.pop();
+ int low = rangeStack.pop();
+ if (low < high) {
+ if (high - low < CHANGE_ALGORITHM_THRESHOLD) {
+ // switch to insertion sort
+ InsertionSorter.insertionSort(vec, low, high, comparator, pivotBuffer);
+ continue;
+ }
+
+ int mid = partition(low, high);
+
+ // push the larger part to stack first,
+ // to reduce the required stack size
+ if (high - mid < mid - low) {
+ rangeStack.push(low);
+ rangeStack.push(mid - 1);
+
+ rangeStack.push(mid + 1);
+ rangeStack.push(high);
+ } else {
+ rangeStack.push(mid + 1);
+ rangeStack.push(high);
+
+ rangeStack.push(low);
+ rangeStack.push(mid - 1);
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * Select the pivot as the median of 3 samples.
+ */
+ void choosePivot(int low, int high) {
+ // we need at least 3 items
+ if (high - low + 1 < STOP_CHOOSING_PIVOT_THRESHOLD) {
+ pivotBuffer.copyFrom(low, 0, vec);
+ return;
+ }
+
+ comparator.attachVector(vec);
+ int mid = low + (high - low) / 2;
+
+ // find the median by at most 3 comparisons
+ int medianIdx;
+ if (comparator.compare(low, mid) < 0) {
+ if (comparator.compare(mid, high) < 0) {
+ medianIdx = mid;
+ } else {
+ if (comparator.compare(low, high) < 0) {
+ medianIdx = high;
+ } else {
+ medianIdx = low;
+ }
+ }
+ } else {
+ if (comparator.compare(mid, high) > 0) {
+ medianIdx = mid;
+ } else {
+ if (comparator.compare(low, high) < 0) {
+ medianIdx = low;
+ } else {
+ medianIdx = high;
+ }
+ }
+ }
+
+ // move the pivot to the low position, if necessary
+ if (medianIdx != low) {
+ pivotBuffer.copyFrom(medianIdx, 0, vec);
+ vec.copyFrom(low, medianIdx, vec);
+ vec.copyFrom(0, low, pivotBuffer);
+ } else {
+ pivotBuffer.copyFrom(low, 0, vec);
+ }
+
+ comparator.attachVectors(vec, pivotBuffer);
+ }
+
+ private int partition(int low, int high) {
+ choosePivot(low, high);
+
+ while (low < high) {
+ while (low < high && comparator.compare(high, 0) >= 0) {
+ high -= 1;
+ }
+ vec.copyFrom(high, low, vec);
+
+ while (low < high && comparator.compare(low, 0) <= 0) {
+ low += 1;
+ }
+ vec.copyFrom(low, high, vec);
+ }
+
+ vec.copyFrom(0, low, pivotBuffer);
+ return low;
+ }
+}
diff --git a/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java
new file mode 100644
index 000000000..4f6c76657
--- /dev/null
+++ b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.sort;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.BaseFixedWidthVector;
+import org.apache.arrow.vector.BitVectorHelper;
+import org.apache.arrow.vector.IntVector;
+
+import io.netty.util.internal.PlatformDependent;
+
+/**
+ * Default out-of-place sorter for fixed-width vectors.
+ * It is an out-of-place sort, with time complexity O(n*log(n)).
+ * @param <V> vector type.
+ */
+public class FixedWidthOutOfPlaceVectorSorter<V extends BaseFixedWidthVector> implements OutOfPlaceVectorSorter<V> {
+
+ protected IndexSorter<V> indexSorter = new IndexSorter<>();
+
+ @Override
+ public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator<V> comparator) {
+ comparator.attachVector(srcVector);
+
+ int valueWidth = comparator.getValueWidth();
+
+ // buffers referenced in the sort
+ ArrowBuf srcValueBuffer = srcVector.getDataBuffer();
+ ArrowBuf dstValidityBuffer = dstVector.getValidityBuffer();
+ ArrowBuf dstValueBuffer = dstVector.getDataBuffer();
+
+ // check buffer size
+ Preconditions.checkArgument(dstValidityBuffer.capacity() * 8 >= srcVector.getValueCount(),
+ "Not enough capacity for the validity buffer of the dst vector. " +
+ "Expected capacity %s, actual capacity %s",
+ (srcVector.getValueCount() + 7) / 8, dstValidityBuffer.capacity());
+ Preconditions.checkArgument(
+ dstValueBuffer.capacity() >= srcVector.getValueCount() * srcVector.getTypeWidth(),
+ "Not enough capacity for the data buffer of the dst vector. " +
+ "Expected capacity %s, actual capacity %s",
+ srcVector.getValueCount() * srcVector.getTypeWidth(), dstValueBuffer.capacity());
+
+ // sort value indices
+ try (IntVector sortedIndices = new IntVector("", srcVector.getAllocator())) {
+ sortedIndices.allocateNew(srcVector.getValueCount());
+ sortedIndices.setValueCount(srcVector.getValueCount());
+ indexSorter.sort(srcVector, sortedIndices, comparator);
+
+ // copy sorted values to the output vector
+ for (int dstIndex = 0; dstIndex < sortedIndices.getValueCount(); dstIndex++) {
+ int srcIndex = sortedIndices.get(dstIndex);
+ if (srcVector.isNull(srcIndex)) {
+ BitVectorHelper.unsetBit(dstValidityBuffer, dstIndex);
+ } else {
+ BitVectorHelper.setBit(dstValidityBuffer, dstIndex);
+ PlatformDependent.copyMemory(
+ srcValueBuffer.memoryAddress() + srcIndex * valueWidth,
+ dstValueBuffer.memoryAddress() + dstIndex * valueWidth,
+ valueWidth);
+ }
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InPlaceVectorSorter.java b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InPlaceVectorSorter.java
new file mode 100644
index 000000000..19817fe76
--- /dev/null
+++ b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InPlaceVectorSorter.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.sort;
+
+import org.apache.arrow.vector.ValueVector;
+
+/**
+ * Basic interface for sorting a vector in-place.
+ * That is, the sorting is performed by modifying the input vector,
+ * without creating a new sorted vector.
+ *
+ * @param <V> the vector type.
+ */
+public interface InPlaceVectorSorter<V extends ValueVector> {
+
+ /**
+ * Sort a vector in-place.
+ * @param vec the vector to sort.
+ * @param comparator the criteria for sort.
+ */
+ void sortInPlace(V vec, VectorValueComparator<V> comparator);
+}
diff --git a/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/IndexSorter.java b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/IndexSorter.java
new file mode 100644
index 000000000..3072717f4
--- /dev/null
+++ b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/IndexSorter.java
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.sort;
+
+import java.util.stream.IntStream;
+
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.ValueVector;
+
+/**
+ * Sorter for the indices of a vector.
+ * @param <V> vector type.
+ */
+public class IndexSorter<V extends ValueVector> {
+
+ /**
+ * If the number of items is smaller than this threshold, we will use another algorithm to sort the data.
+ */
+ public static final int CHANGE_ALGORITHM_THRESHOLD = 15;
+
+ /**
+ * Comparator for vector indices.
+ */
+ private VectorValueComparator<V> comparator;
+
+ /**
+ * Vector indices to sort.
+ */
+ private IntVector indices;
+
+ /**
+ * Sorts indices, by quick-sort. Suppose the vector is denoted by v.
+ * After calling this method, the following relations hold:
+ * v(indices[0]) <= v(indices[1]) <= ...
+ * @param vector the vector whose indices need to be sorted.
+ * @param indices the vector for storing the sorted indices.
+ * @param comparator the comparator to sort indices.
+ */
+ public void sort(V vector, IntVector indices, VectorValueComparator<V> comparator) {
+ comparator.attachVector(vector);
+
+ this.indices = indices;
+
+ IntStream.range(0, vector.getValueCount()).forEach(i -> indices.set(i, i));
+
+ this.comparator = comparator;
+
+ quickSort();
+ }
+
+ private void quickSort() {
+ try (OffHeapIntStack rangeStack = new OffHeapIntStack(indices.getAllocator())) {
+ rangeStack.push(0);
+ rangeStack.push(indices.getValueCount() - 1);
+
+ while (!rangeStack.isEmpty()) {
+ int high = rangeStack.pop();
+ int low = rangeStack.pop();
+
+ if (low < high) {
+ if (high - low < CHANGE_ALGORITHM_THRESHOLD) {
+ InsertionSorter.insertionSort(indices, low, high, comparator);
+ continue;
+ }
+
+ int mid = partition(low, high, indices, comparator);
+
+ // push the larger part to stack first,
+ // to reduce the required stack size
+ if (high - mid < mid - low) {
+ rangeStack.push(low);
+ rangeStack.push(mid - 1);
+
+ rangeStack.push(mid + 1);
+ rangeStack.push(high);
+ } else {
+ rangeStack.push(mid + 1);
+ rangeStack.push(high);
+
+ rangeStack.push(low);
+ rangeStack.push(mid - 1);
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * Select the pivot as the median of 3 samples.
+ */
+ static <T extends ValueVector> int choosePivot(
+ int low, int high, IntVector indices, VectorValueComparator<T> comparator) {
+ // we need at least 3 items
+ if (high - low + 1 < FixedWidthInPlaceVectorSorter.STOP_CHOOSING_PIVOT_THRESHOLD) {
+ return indices.get(low);
+ }
+
+ int mid = low + (high - low) / 2;
+
+ // find the median by at most 3 comparisons
+ int medianIdx;
+ if (comparator.compare(indices.get(low), indices.get(mid)) < 0) {
+ if (comparator.compare(indices.get(mid), indices.get(high)) < 0) {
+ medianIdx = mid;
+ } else {
+ if (comparator.compare(indices.get(low), indices.get(high)) < 0) {
+ medianIdx = high;
+ } else {
+ medianIdx = low;
+ }
+ }
+ } else {
+ if (comparator.compare(indices.get(mid), indices.get(high)) > 0) {
+ medianIdx = mid;
+ } else {
+ if (comparator.compare(indices.get(low), indices.get(high)) < 0) {
+ medianIdx = low;
+ } else {
+ medianIdx = high;
+ }
+ }
+ }
+
+ // move the pivot to the low position, if necessary
+ if (medianIdx != low) {
+ int tmp = indices.get(medianIdx);
+ indices.set(medianIdx, indices.get(low));
+ indices.set(low, tmp);
+ return tmp;
+ } else {
+ return indices.get(low);
+ }
+ }
+
+ /**
+ * Partition a range of values in a vector into two parts, with elements in one part smaller than
+ * elements from the other part. The partition is based on the element indices, so it does
+ * not modify the underlying vector.
+ * @param low the lower bound of the range.
+ * @param high the upper bound of the range.
+ * @param indices vector element indices.
+ * @param comparator criteria for comparison.
+ * @param <T> the vector type.
+ * @return the index of the split point.
+ */
+ public static <T extends ValueVector> int partition(
+ int low, int high, IntVector indices, VectorValueComparator<T> comparator) {
+ int pivotIndex = choosePivot(low, high, indices, comparator);
+
+ while (low < high) {
+ while (low < high && comparator.compare(indices.get(high), pivotIndex) >= 0) {
+ high -= 1;
+ }
+ indices.set(low, indices.get(high));
+
+ while (low < high && comparator.compare(indices.get(low), pivotIndex) <= 0) {
+ low += 1;
+ }
+ indices.set(high, indices.get(low));
+ }
+
+ indices.set(low, pivotIndex);
+ return low;
+ }
+}
diff --git a/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InsertionSorter.java b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InsertionSorter.java
new file mode 100644
index 000000000..dc12a5fef
--- /dev/null
+++ b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InsertionSorter.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.sort;
+
+import org.apache.arrow.vector.BaseFixedWidthVector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.ValueVector;
+
+/**
+ * Insertion sorter.
+ */
+class InsertionSorter {
+
+ /**
+ * Sorts the range of a vector by insertion sort.
+ *
+ * @param vector the vector to be sorted.
+ * @param startIdx the start index of the range (inclusive).
+ * @param endIdx the end index of the range (inclusive).
+ * @param buffer an extra buffer with capacity 1 to hold the current key.
+ * @param comparator the criteria for vector element comparison.
+ * @param <V> the vector type.
+ */
+ static <V extends BaseFixedWidthVector> void insertionSort(
+ V vector, int startIdx, int endIdx, VectorValueComparator<V> comparator, V buffer) {
+ comparator.attachVectors(vector, buffer);
+ for (int i = startIdx; i <= endIdx; i++) {
+ buffer.copyFrom(i, 0, vector);
+ int j = i - 1;
+ while (j >= startIdx && comparator.compare(j, 0) > 0) {
+ vector.copyFrom(j, j + 1, vector);
+ j = j - 1;
+ }
+ vector.copyFrom(0, j + 1, buffer);
+ }
+ }
+
+ /**
+ * Sorts the range of vector indices by insertion sort.
+ *
+ * @param indices the vector indices.
+ * @param startIdx the start index of the range (inclusive).
+ * @param endIdx the end index of the range (inclusive).
+ * @param comparator the criteria for vector element comparison.
+ * @param <V> the vector type.
+ */
+ static <V extends ValueVector> void insertionSort(
+ IntVector indices, int startIdx, int endIdx, VectorValueComparator<V> comparator) {
+ for (int i = startIdx; i <= endIdx; i++) {
+ int key = indices.get(i);
+ int j = i - 1;
+ while (j >= startIdx && comparator.compare(indices.get(j), key) > 0) {
+ indices.set(j + 1, indices.get(j));
+ j = j - 1;
+ }
+ indices.set(j + 1, key);
+ }
+ }
+}
diff --git a/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OffHeapIntStack.java b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OffHeapIntStack.java
new file mode 100644
index 000000000..df96121f1
--- /dev/null
+++ b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OffHeapIntStack.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.sort;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.IntVector;
+
+/**
+ * An off heap implementation of stack with int elements.
+ */
+class OffHeapIntStack implements AutoCloseable {
+
+ private static final int INIT_SIZE = 128;
+
+ private IntVector intVector;
+
+ private int top = 0;
+
+ public OffHeapIntStack(BufferAllocator allocator) {
+ intVector = new IntVector("int stack inner vector", allocator);
+ intVector.allocateNew(INIT_SIZE);
+ intVector.setValueCount(INIT_SIZE);
+ }
+
+ public void push(int value) {
+ if (top == intVector.getValueCount()) {
+ int targetCapacity = intVector.getValueCount() * 2;
+ while (intVector.getValueCapacity() < targetCapacity) {
+ intVector.reAlloc();
+ }
+ intVector.setValueCount(targetCapacity);
+ }
+
+ intVector.set(top++, value);
+ }
+
+ public int pop() {
+ return intVector.get(--top);
+ }
+
+ public int getTop() {
+ return intVector.get(top - 1);
+ }
+
+ public boolean isEmpty() {
+ return top == 0;
+ }
+
+ public int getCount() {
+ return top;
+ }
+
+ @Override
+ public void close() {
+ intVector.close();
+ }
+}
diff --git a/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OutOfPlaceVectorSorter.java b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OutOfPlaceVectorSorter.java
new file mode 100644
index 000000000..41d6dadc4
--- /dev/null
+++ b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OutOfPlaceVectorSorter.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.sort;
+
+import org.apache.arrow.vector.ValueVector;
+
+/**
+ * Basic interface for sorting a vector out-of-place.
+ * That is, the sorting is performed on a newly-created vector,
+ * and the original vector is not modified.
+ * @param <V> the vector type.
+ */
+public interface OutOfPlaceVectorSorter<V extends ValueVector> {
+
+ /**
+ * Sort a vector out-of-place.
+ * @param inVec the input vector.
+ * @param outVec the output vector, which has the same size as the input vector.
+ * @param comparator the criteria for sort.
+ */
+ void sortOutOfPlace(V inVec, V outVec, VectorValueComparator<V> comparator);
+}
diff --git a/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/StableVectorComparator.java b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/StableVectorComparator.java
new file mode 100644
index 000000000..0b0c3bd55
--- /dev/null
+++ b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/StableVectorComparator.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.sort;
+
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.ValueVector;
+
+/**
+ * Stable sorter. It compares values like ordinary comparators.
+ * However, when values are equal, it breaks ties by the value indices.
+ * Therefore, sort algorithms using this comparator always produce
+ * stable sort results.
+ * @param <V> type of the vector.
+ */
+public class StableVectorComparator<V extends ValueVector> extends VectorValueComparator<V> {
+
+ private final VectorValueComparator<V> innerComparator;
+
+ /**
+ * Constructs a stable comparator from a given comparator.
+ * @param innerComparator the comparator to convert to stable comparator..
+ */
+ public StableVectorComparator(VectorValueComparator<V> innerComparator) {
+ this.innerComparator = innerComparator;
+ }
+
+ @Override
+ public void attachVector(V vector) {
+ super.attachVector(vector);
+ innerComparator.attachVector(vector);
+ }
+
+ @Override
+ public void attachVectors(V vector1, V vector2) {
+ Preconditions.checkArgument(vector1 == vector2,
+ "Stable comparator only supports comparing values from the same vector");
+ super.attachVectors(vector1, vector2);
+ innerComparator.attachVectors(vector1, vector2);
+ }
+
+ @Override
+ public int compareNotNull(int index1, int index2) {
+ int result = innerComparator.compare(index1, index2);
+ return result != 0 ? result : index1 - index2;
+ }
+
+ @Override
+ public VectorValueComparator<V> createNew() {
+ return new StableVectorComparator<V>(innerComparator.createNew());
+ }
+}
diff --git a/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VariableWidthOutOfPlaceVectorSorter.java b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VariableWidthOutOfPlaceVectorSorter.java
new file mode 100644
index 000000000..62003752e
--- /dev/null
+++ b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VariableWidthOutOfPlaceVectorSorter.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.sort;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.BaseVariableWidthVector;
+import org.apache.arrow.vector.BitVectorHelper;
+import org.apache.arrow.vector.IntVector;
+
+import io.netty.util.internal.PlatformDependent;
+
+/**
+ * Default sorter for variable-width vectors.
+ * It is an out-of-place sort, with time complexity O(n*log(n)).
+ * @param <V> vector type.
+ */
+public class VariableWidthOutOfPlaceVectorSorter<V extends BaseVariableWidthVector>
+ implements OutOfPlaceVectorSorter<V> {
+
+ protected IndexSorter<V> indexSorter = new IndexSorter<>();
+
+ @Override
+ public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator<V> comparator) {
+ comparator.attachVector(srcVector);
+
+ // buffers referenced in the sort
+ ArrowBuf srcValueBuffer = srcVector.getDataBuffer();
+ ArrowBuf srcOffsetBuffer = srcVector.getOffsetBuffer();
+ ArrowBuf dstValidityBuffer = dstVector.getValidityBuffer();
+ ArrowBuf dstValueBuffer = dstVector.getDataBuffer();
+ ArrowBuf dstOffsetBuffer = dstVector.getOffsetBuffer();
+
+ // check buffer size
+ Preconditions.checkArgument(dstValidityBuffer.capacity() * 8 >= srcVector.getValueCount(),
+ "Not enough capacity for the validity buffer of the dst vector. " +
+ "Expected capacity %s, actual capacity %s",
+ (srcVector.getValueCount() + 7) / 8, dstValidityBuffer.capacity());
+ Preconditions.checkArgument(
+ dstOffsetBuffer.capacity() >= (srcVector.getValueCount() + 1) * BaseVariableWidthVector.OFFSET_WIDTH,
+ "Not enough capacity for the offset buffer of the dst vector. " +
+ "Expected capacity %s, actual capacity %s",
+ (srcVector.getValueCount() + 1) * BaseVariableWidthVector.OFFSET_WIDTH, dstOffsetBuffer.capacity());
+ long dataSize = srcVector.getOffsetBuffer().getInt(
+ srcVector.getValueCount() * BaseVariableWidthVector.OFFSET_WIDTH);
+ Preconditions.checkArgument(
+ dstValueBuffer.capacity() >= dataSize, "No enough capacity for the data buffer of the dst vector. " +
+ "Expected capacity %s, actual capacity %s", dataSize, dstValueBuffer.capacity());
+
+ // sort value indices
+ try (IntVector sortedIndices = new IntVector("", srcVector.getAllocator())) {
+ sortedIndices.allocateNew(srcVector.getValueCount());
+ sortedIndices.setValueCount(srcVector.getValueCount());
+ indexSorter.sort(srcVector, sortedIndices, comparator);
+
+ int dstOffset = 0;
+ dstOffsetBuffer.setInt(0, 0);
+
+ // copy sorted values to the output vector
+ for (int dstIndex = 0; dstIndex < sortedIndices.getValueCount(); dstIndex++) {
+ int srcIndex = sortedIndices.get(dstIndex);
+ if (srcVector.isNull(srcIndex)) {
+ BitVectorHelper.unsetBit(dstValidityBuffer, dstIndex);
+ } else {
+ BitVectorHelper.setBit(dstValidityBuffer, dstIndex);
+ int srcOffset = srcOffsetBuffer.getInt(srcIndex * BaseVariableWidthVector.OFFSET_WIDTH);
+ int valueLength = srcOffsetBuffer.getInt((srcIndex + 1) * BaseVariableWidthVector.OFFSET_WIDTH) - srcOffset;
+ PlatformDependent.copyMemory(
+ srcValueBuffer.memoryAddress() + srcOffset,
+ dstValueBuffer.memoryAddress() + dstOffset,
+ valueLength);
+ dstOffset += valueLength;
+ }
+ dstOffsetBuffer.setInt((dstIndex + 1) * BaseVariableWidthVector.OFFSET_WIDTH, dstOffset);
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java
new file mode 100644
index 000000000..ed32e16ca
--- /dev/null
+++ b/src/arrow/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.sort;
+
+import org.apache.arrow.vector.ValueVector;
+
+/**
+ * Compare two values at the given indices in the vectors.
+ * This is used for vector sorting.
+ * @param <V> type of the vector.
+ */
+public abstract class VectorValueComparator<V extends ValueVector> {
+
+ /**
+ * The first vector to compare.
+ */
+ protected V vector1;
+
+ /**
+ * The second vector to compare.
+ */
+ protected V vector2;
+
+ /**
+ * Width of the vector value. For variable-length vectors, this value makes no sense.
+ */
+ protected int valueWidth;
+
+ /**
+ * Constructor for variable-width vectors.
+ */
+ protected VectorValueComparator() {
+
+ }
+
+ /**
+ * Constructor for fixed-width vectors.
+ * @param valueWidth the record width (in bytes).
+ */
+ protected VectorValueComparator(int valueWidth) {
+ this.valueWidth = valueWidth;
+ }
+
+ public int getValueWidth() {
+ return valueWidth;
+ }
+
+ /**
+ * Attach both vectors to compare to the same input vector.
+ * @param vector the vector to attach.
+ */
+ public void attachVector(V vector) {
+ attachVectors(vector, vector);
+ }
+
+ /**
+ * Attach vectors to compare.
+ * @param vector1 the first vector to compare.
+ * @param vector2 the second vector to compare.
+ */
+ public void attachVectors(V vector1, V vector2) {
+ this.vector1 = vector1;
+ this.vector2 = vector2;
+ }
+
+ /**
+ * Compare two values, given their indices.
+ * @param index1 index of the first value to compare.
+ * @param index2 index of the second value to compare.
+ * @return an integer greater than 0, if the first value is greater;
+ * an integer smaller than 0, if the first value is smaller; or 0, if both
+ * values are equal.
+ */
+ public int compare(int index1, int index2) {
+ boolean isNull1 = vector1.isNull(index1);
+ boolean isNull2 = vector2.isNull(index2);
+
+ if (isNull1 || isNull2) {
+ if (isNull1 && isNull2) {
+ return 0;
+ } else if (isNull1) {
+ // null is smaller
+ return -1;
+ } else {
+ return 1;
+ }
+ }
+ return compareNotNull(index1, index2);
+ }
+
+ /**
+ * Compare two values, given their indices.
+ * This is a fast path for comparing non-null values, so the caller
+ * must make sure that values at both indices are not null.
+ * @param index1 index of the first value to compare.
+ * @param index2 index of the second value to compare.
+ * @return an integer greater than 0, if the first value is greater;
+ * an integer smaller than 0, if the first value is smaller; or 0, if both
+ * values are equal.
+ */
+ public abstract int compareNotNull(int index1, int index2);
+
+ /**
+ * Creates a comparator of the same type.
+ * @return the newly created comparator.
+ */
+ public abstract VectorValueComparator<V> createNew();
+}
diff --git a/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestDeduplicationUtils.java b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestDeduplicationUtils.java
new file mode 100644
index 000000000..def83fba7
--- /dev/null
+++ b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestDeduplicationUtils.java
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.deduplicate;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BitVectorHelper;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.util.DataSizeRoundingUtil;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Test cases for {@link DeduplicationUtils}.
+ */
+public class TestDeduplicationUtils {
+
+ private static final int VECTOR_LENGTH = 100;
+
+ private static final int REPETITION_COUNT = 3;
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void prepare() {
+ allocator = new RootAllocator(1024 * 1024);
+ }
+
+ @After
+ public void shutdown() {
+ allocator.close();
+ }
+
+ @Test
+ public void testDeduplicateFixedWidth() {
+ try (IntVector origVec = new IntVector("original vec", allocator);
+ IntVector dedupVec = new IntVector("deduplicated vec", allocator);
+ IntVector lengthVec = new IntVector("length vec", allocator);
+ ArrowBuf distinctBuf = allocator.buffer(
+ DataSizeRoundingUtil.divideBy8Ceil(VECTOR_LENGTH * REPETITION_COUNT))) {
+ origVec.allocateNew(VECTOR_LENGTH * REPETITION_COUNT);
+ origVec.setValueCount(VECTOR_LENGTH * REPETITION_COUNT);
+ lengthVec.allocateNew();
+
+ // prepare data
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ for (int j = 0; j < REPETITION_COUNT; j++) {
+ origVec.set(i * REPETITION_COUNT + j, i);
+ }
+ }
+
+ DeduplicationUtils.populateRunStartIndicators(origVec, distinctBuf);
+ assertEquals( VECTOR_LENGTH,
+ VECTOR_LENGTH * REPETITION_COUNT -
+ BitVectorHelper.getNullCount(distinctBuf, VECTOR_LENGTH * REPETITION_COUNT));
+
+ DeduplicationUtils.populateDeduplicatedValues(distinctBuf, origVec, dedupVec);
+ assertEquals(VECTOR_LENGTH, dedupVec.getValueCount());
+
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ assertEquals(i, dedupVec.get(i));
+ }
+
+ DeduplicationUtils.populateRunLengths(distinctBuf, lengthVec, VECTOR_LENGTH * REPETITION_COUNT);
+ assertEquals(VECTOR_LENGTH, lengthVec.getValueCount());
+
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ assertEquals(REPETITION_COUNT, lengthVec.get(i));
+ }
+ }
+ }
+
+ @Test
+ public void testDeduplicateVariableWidth() {
+ try (VarCharVector origVec = new VarCharVector("original vec", allocator);
+ VarCharVector dedupVec = new VarCharVector("deduplicated vec", allocator);
+ IntVector lengthVec = new IntVector("length vec", allocator);
+ ArrowBuf distinctBuf = allocator.buffer(
+ DataSizeRoundingUtil.divideBy8Ceil(VECTOR_LENGTH * REPETITION_COUNT))) {
+ origVec.allocateNew(
+ VECTOR_LENGTH * REPETITION_COUNT * 10, VECTOR_LENGTH * REPETITION_COUNT);
+ origVec.setValueCount(VECTOR_LENGTH * REPETITION_COUNT);
+ lengthVec.allocateNew();
+
+ // prepare data
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ String str = String.valueOf(i * i);
+ for (int j = 0; j < REPETITION_COUNT; j++) {
+ origVec.set(i * REPETITION_COUNT + j, str.getBytes());
+ }
+ }
+
+ DeduplicationUtils.populateRunStartIndicators(origVec, distinctBuf);
+ assertEquals(VECTOR_LENGTH,
+ VECTOR_LENGTH * REPETITION_COUNT -
+ BitVectorHelper.getNullCount(distinctBuf, VECTOR_LENGTH * REPETITION_COUNT));
+
+ DeduplicationUtils.populateDeduplicatedValues(distinctBuf, origVec, dedupVec);
+ assertEquals(VECTOR_LENGTH, dedupVec.getValueCount());
+
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ assertArrayEquals(String.valueOf(i * i).getBytes(), dedupVec.get(i));
+ }
+
+ DeduplicationUtils.populateRunLengths(
+ distinctBuf, lengthVec, VECTOR_LENGTH * REPETITION_COUNT);
+ assertEquals(VECTOR_LENGTH, lengthVec.getValueCount());
+
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ assertEquals(REPETITION_COUNT, lengthVec.get(i));
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestVectorRunDeduplicator.java b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestVectorRunDeduplicator.java
new file mode 100644
index 000000000..4bfa6e255
--- /dev/null
+++ b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestVectorRunDeduplicator.java
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.deduplicate;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Test cases for {@link VectorRunDeduplicator}.
+ */
+public class TestVectorRunDeduplicator {
+
+ private static final int VECTOR_LENGTH = 100;
+
+ private static final int REPETITION_COUNT = 3;
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void prepare() {
+ allocator = new RootAllocator(1024 * 1024);
+ }
+
+ @After
+ public void shutdown() {
+ allocator.close();
+ }
+
+ @Test
+ public void testDeduplicateFixedWidth() {
+ try (IntVector origVec = new IntVector("original vec", allocator);
+ IntVector dedupVec = new IntVector("deduplicated vec", allocator);
+ IntVector lengthVec = new IntVector("length vec", allocator);
+ VectorRunDeduplicator<IntVector> deduplicator =
+ new VectorRunDeduplicator<>(origVec, allocator)) {
+ origVec.allocateNew(VECTOR_LENGTH * REPETITION_COUNT);
+ origVec.setValueCount(VECTOR_LENGTH * REPETITION_COUNT);
+ lengthVec.allocateNew();
+
+ // prepare data
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ for (int j = 0; j < REPETITION_COUNT; j++) {
+ origVec.set(i * REPETITION_COUNT + j, i);
+ }
+ }
+
+ int distinctCount = deduplicator.getRunCount();
+ assertEquals(VECTOR_LENGTH, distinctCount);
+
+ dedupVec.allocateNew(distinctCount);
+
+ deduplicator.populateDeduplicatedValues(dedupVec);
+ assertEquals(VECTOR_LENGTH, dedupVec.getValueCount());
+
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ assertEquals(i, dedupVec.get(i));
+ }
+
+ deduplicator.populateRunLengths(lengthVec);
+ assertEquals(VECTOR_LENGTH, lengthVec.getValueCount());
+
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ assertEquals(REPETITION_COUNT, lengthVec.get(i));
+ }
+ }
+ }
+
+ @Test
+ public void testDeduplicateVariableWidth() {
+ try (VarCharVector origVec = new VarCharVector("original vec", allocator);
+ VarCharVector dedupVec = new VarCharVector("deduplicated vec", allocator);
+ IntVector lengthVec = new IntVector("length vec", allocator);
+ VectorRunDeduplicator<VarCharVector> deduplicator =
+ new VectorRunDeduplicator<>(origVec, allocator)) {
+ origVec.allocateNew(
+ VECTOR_LENGTH * REPETITION_COUNT * 10, VECTOR_LENGTH * REPETITION_COUNT);
+ origVec.setValueCount(VECTOR_LENGTH * REPETITION_COUNT);
+ lengthVec.allocateNew();
+
+ // prepare data
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ String str = String.valueOf(i * i);
+ for (int j = 0; j < REPETITION_COUNT; j++) {
+ origVec.set(i * REPETITION_COUNT + j, str.getBytes());
+ }
+ }
+
+ int distinctCount = deduplicator.getRunCount();
+ assertEquals(VECTOR_LENGTH, distinctCount);
+
+ dedupVec.allocateNew(distinctCount * 10, distinctCount);
+
+ deduplicator.populateDeduplicatedValues(dedupVec);
+ assertEquals(VECTOR_LENGTH, dedupVec.getValueCount());
+
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ assertArrayEquals(String.valueOf(i * i).getBytes(), dedupVec.get(i));
+ }
+
+ deduplicator.populateRunLengths(lengthVec);
+ assertEquals(VECTOR_LENGTH, lengthVec.getValueCount());
+
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ assertEquals(REPETITION_COUNT, lengthVec.get(i));
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableBasedDictionaryBuilder.java b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableBasedDictionaryBuilder.java
new file mode 100644
index 000000000..0a3314535
--- /dev/null
+++ b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableBasedDictionaryBuilder.java
@@ -0,0 +1,202 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.dictionary;
+
+import static junit.framework.TestCase.assertTrue;
+import static org.junit.Assert.assertNull;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Test cases for {@link HashTableBasedDictionaryBuilder}.
+ */
+public class TestHashTableBasedDictionaryBuilder {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void prepare() {
+ allocator = new RootAllocator(1024 * 1024);
+ }
+
+ @After
+ public void shutdown() {
+ allocator.close();
+ }
+
+ @Test
+ public void testBuildVariableWidthDictionaryWithNull() {
+ try (VarCharVector vec = new VarCharVector("", allocator);
+ VarCharVector dictionary = new VarCharVector("", allocator)) {
+
+ vec.allocateNew(100, 10);
+ vec.setValueCount(10);
+
+ dictionary.allocateNew();
+
+ // fill data
+ vec.set(0, "hello".getBytes());
+ vec.set(1, "abc".getBytes());
+ vec.setNull(2);
+ vec.set(3, "world".getBytes());
+ vec.set(4, "12".getBytes());
+ vec.set(5, "dictionary".getBytes());
+ vec.setNull(6);
+ vec.set(7, "hello".getBytes());
+ vec.set(8, "good".getBytes());
+ vec.set(9, "abc".getBytes());
+
+ HashTableBasedDictionaryBuilder<VarCharVector> dictionaryBuilder =
+ new HashTableBasedDictionaryBuilder<>(dictionary, true);
+
+ int result = dictionaryBuilder.addValues(vec);
+
+ assertEquals(7, result);
+ assertEquals(7, dictionary.getValueCount());
+
+ assertEquals("hello", new String(dictionary.get(0)));
+ assertEquals("abc", new String(dictionary.get(1)));
+ assertNull(dictionary.get(2));
+ assertEquals("world", new String(dictionary.get(3)));
+ assertEquals("12", new String(dictionary.get(4)));
+ assertEquals("dictionary", new String(dictionary.get(5)));
+ assertEquals("good", new String(dictionary.get(6)));
+ }
+ }
+
+ @Test
+ public void testBuildVariableWidthDictionaryWithoutNull() {
+ try (VarCharVector vec = new VarCharVector("", allocator);
+ VarCharVector dictionary = new VarCharVector("", allocator)) {
+
+ vec.allocateNew(100, 10);
+ vec.setValueCount(10);
+
+ dictionary.allocateNew();
+
+ // fill data
+ vec.set(0, "hello".getBytes());
+ vec.set(1, "abc".getBytes());
+ vec.setNull(2);
+ vec.set(3, "world".getBytes());
+ vec.set(4, "12".getBytes());
+ vec.set(5, "dictionary".getBytes());
+ vec.setNull(6);
+ vec.set(7, "hello".getBytes());
+ vec.set(8, "good".getBytes());
+ vec.set(9, "abc".getBytes());
+
+ HashTableBasedDictionaryBuilder<VarCharVector> dictionaryBuilder =
+ new HashTableBasedDictionaryBuilder<>(dictionary, false);
+
+ int result = dictionaryBuilder.addValues(vec);
+
+ assertEquals(6, result);
+ assertEquals(6, dictionary.getValueCount());
+
+ assertEquals("hello", new String(dictionary.get(0)));
+ assertEquals("abc", new String(dictionary.get(1)));
+ assertEquals("world", new String(dictionary.get(2)));
+ assertEquals("12", new String(dictionary.get(3)));
+ assertEquals("dictionary", new String(dictionary.get(4)));
+ assertEquals("good", new String(dictionary.get(5)));
+
+ }
+ }
+
+ @Test
+ public void testBuildFixedWidthDictionaryWithNull() {
+ try (IntVector vec = new IntVector("", allocator);
+ IntVector dictionary = new IntVector("", allocator)) {
+ vec.allocateNew(10);
+ vec.setValueCount(10);
+
+ dictionary.allocateNew();
+
+ // fill data
+ vec.set(0, 4);
+ vec.set(1, 8);
+ vec.set(2, 32);
+ vec.set(3, 8);
+ vec.set(4, 16);
+ vec.set(5, 32);
+ vec.setNull(6);
+ vec.set(7, 4);
+ vec.set(8, 4);
+ vec.setNull(9);
+
+ HashTableBasedDictionaryBuilder<IntVector> dictionaryBuilder =
+ new HashTableBasedDictionaryBuilder<>(dictionary, true);
+
+ int result = dictionaryBuilder.addValues(vec);
+
+ assertEquals(5, result);
+ assertEquals(5, dictionary.getValueCount());
+
+ assertEquals(4, dictionary.get(0));
+ assertEquals(8, dictionary.get(1));
+ assertEquals(32, dictionary.get(2));
+ assertEquals(16, dictionary.get(3));
+ assertTrue(dictionary.isNull(4));
+ }
+ }
+
+ @Test
+ public void testBuildFixedWidthDictionaryWithoutNull() {
+ try (IntVector vec = new IntVector("", allocator);
+ IntVector dictionary = new IntVector("", allocator)) {
+ vec.allocateNew(10);
+ vec.setValueCount(10);
+
+ dictionary.allocateNew();
+
+ // fill data
+ vec.set(0, 4);
+ vec.set(1, 8);
+ vec.set(2, 32);
+ vec.set(3, 8);
+ vec.set(4, 16);
+ vec.set(5, 32);
+ vec.setNull(6);
+ vec.set(7, 4);
+ vec.set(8, 4);
+ vec.setNull(9);
+
+ HashTableBasedDictionaryBuilder<IntVector> dictionaryBuilder =
+ new HashTableBasedDictionaryBuilder<>(dictionary, false);
+
+ int result = dictionaryBuilder.addValues(vec);
+
+ assertEquals(4, result);
+ assertEquals(4, dictionary.getValueCount());
+
+ assertEquals(4, dictionary.get(0));
+ assertEquals(8, dictionary.get(1));
+ assertEquals(32, dictionary.get(2));
+ assertEquals(16, dictionary.get(3));
+
+ }
+ }
+}
diff --git a/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java
new file mode 100644
index 000000000..dd22ac96f
--- /dev/null
+++ b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java
@@ -0,0 +1,350 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.dictionary;
+
+import static junit.framework.TestCase.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.Random;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.VarBinaryVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryEncoder;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Test cases for {@link HashTableDictionaryEncoder}.
+ */
+public class TestHashTableDictionaryEncoder {
+
+ private final int VECTOR_LENGTH = 50;
+
+ private final int DICTIONARY_LENGTH = 10;
+
+ private BufferAllocator allocator;
+
+ byte[] zero = "000".getBytes(StandardCharsets.UTF_8);
+ byte[] one = "111".getBytes(StandardCharsets.UTF_8);
+ byte[] two = "222".getBytes(StandardCharsets.UTF_8);
+
+ byte[][] data = new byte[][]{zero, one, two};
+
+ @Before
+ public void prepare() {
+ allocator = new RootAllocator(1024 * 1024);
+ }
+
+ @After
+ public void shutdown() {
+ allocator.close();
+ }
+
+ @Test
+ public void testEncodeAndDecode() {
+ Random random = new Random();
+ try (VarCharVector rawVector = new VarCharVector("original vector", allocator);
+ IntVector encodedVector = new IntVector("encoded vector", allocator);
+ VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
+
+ // set up dictionary
+ dictionary.allocateNew();
+ for (int i = 0; i < DICTIONARY_LENGTH; i++) {
+ // encode "i" as i
+ dictionary.setSafe(i, String.valueOf(i).getBytes());
+ }
+ dictionary.setValueCount(DICTIONARY_LENGTH);
+
+ // set up raw vector
+ rawVector.allocateNew(10 * VECTOR_LENGTH, VECTOR_LENGTH);
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ int val = (random.nextInt() & Integer.MAX_VALUE) % DICTIONARY_LENGTH;
+ rawVector.set(i, String.valueOf(val).getBytes());
+ }
+ rawVector.setValueCount(VECTOR_LENGTH);
+
+ HashTableDictionaryEncoder<IntVector, VarCharVector> encoder =
+ new HashTableDictionaryEncoder<>(dictionary, false);
+
+ // perform encoding
+ encodedVector.allocateNew();
+ encoder.encode(rawVector, encodedVector);
+
+ // verify encoding results
+ assertEquals(rawVector.getValueCount(), encodedVector.getValueCount());
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes());
+ }
+
+ // perform decoding
+ Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null));
+ try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) {
+
+ // verify decoding results
+ assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount());
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(), decodedVector.get(i));
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testEncodeAndDecodeWithNull() {
+ Random random = new Random();
+ try (VarCharVector rawVector = new VarCharVector("original vector", allocator);
+ IntVector encodedVector = new IntVector("encoded vector", allocator);
+ VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
+
+ // set up dictionary
+ dictionary.allocateNew();
+ dictionary.setNull(0);
+ for (int i = 1; i < DICTIONARY_LENGTH; i++) {
+ // encode "i" as i
+ dictionary.setSafe(i, String.valueOf(i).getBytes());
+ }
+ dictionary.setValueCount(DICTIONARY_LENGTH);
+
+ // set up raw vector
+ rawVector.allocateNew(10 * VECTOR_LENGTH, VECTOR_LENGTH);
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ if (i % 10 == 0) {
+ rawVector.setNull(i);
+ } else {
+ int val = (random.nextInt() & Integer.MAX_VALUE) % (DICTIONARY_LENGTH - 1) + 1;
+ rawVector.set(i, String.valueOf(val).getBytes());
+ }
+ }
+ rawVector.setValueCount(VECTOR_LENGTH);
+
+ HashTableDictionaryEncoder<IntVector, VarCharVector> encoder =
+ new HashTableDictionaryEncoder<>(dictionary, true);
+
+ // perform encoding
+ encodedVector.allocateNew();
+ encoder.encode(rawVector, encodedVector);
+
+ // verify encoding results
+ assertEquals(rawVector.getValueCount(), encodedVector.getValueCount());
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ if (i % 10 == 0) {
+ assertEquals(0, encodedVector.get(i));
+ } else {
+ assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes());
+ }
+ }
+
+ // perform decoding
+ Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null));
+ try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) {
+ // verify decoding results
+ assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount());
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ if (i % 10 == 0) {
+ assertTrue(decodedVector.isNull(i));
+ } else {
+ assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(), decodedVector.get(i));
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testEncodeNullWithoutNullInDictionary() {
+ try (VarCharVector rawVector = new VarCharVector("original vector", allocator);
+ IntVector encodedVector = new IntVector("encoded vector", allocator);
+ VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
+
+ // set up dictionary, with no null in it.
+ dictionary.allocateNew();
+ for (int i = 0; i < DICTIONARY_LENGTH; i++) {
+ // encode "i" as i
+ dictionary.setSafe(i, String.valueOf(i).getBytes());
+ }
+ dictionary.setValueCount(DICTIONARY_LENGTH);
+
+ // the vector to encode has a null inside.
+ rawVector.allocateNew(1);
+ rawVector.setNull(0);
+ rawVector.setValueCount(1);
+
+ encodedVector.allocateNew();
+
+ HashTableDictionaryEncoder<IntVector, VarCharVector> encoder =
+ new HashTableDictionaryEncoder<>(dictionary, true);
+
+ // the encoder should encode null, but no null in the dictionary,
+ // so an exception should be thrown.
+ assertThrows(IllegalArgumentException.class, () -> {
+ encoder.encode(rawVector, encodedVector);
+ });
+ }
+ }
+
+ @Test
+ public void testEncodeStrings() {
+ // Create a new value vector
+ try (final VarCharVector vector = new VarCharVector("foo", allocator);
+ final IntVector encoded = new IntVector("encoded", allocator);
+ final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) {
+
+ vector.allocateNew(512, 5);
+ encoded.allocateNew();
+
+ // set some values
+ vector.setSafe(0, zero, 0, zero.length);
+ vector.setSafe(1, one, 0, one.length);
+ vector.setSafe(2, one, 0, one.length);
+ vector.setSafe(3, two, 0, two.length);
+ vector.setSafe(4, zero, 0, zero.length);
+ vector.setValueCount(5);
+
+ // set some dictionary values
+ dictionaryVector.allocateNew(512, 3);
+ dictionaryVector.setSafe(0, zero, 0, one.length);
+ dictionaryVector.setSafe(1, one, 0, two.length);
+ dictionaryVector.setSafe(2, two, 0, zero.length);
+ dictionaryVector.setValueCount(3);
+
+ HashTableDictionaryEncoder<IntVector, VarCharVector> encoder =
+ new HashTableDictionaryEncoder<>(dictionaryVector);
+ encoder.encode(vector, encoded);
+
+ // verify indices
+ assertEquals(5, encoded.getValueCount());
+ assertEquals(0, encoded.get(0));
+ assertEquals(1, encoded.get(1));
+ assertEquals(1, encoded.get(2));
+ assertEquals(2, encoded.get(3));
+ assertEquals(0, encoded.get(4));
+
+ // now run through the decoder and verify we get the original back
+ Dictionary dict = new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null));
+ try (VarCharVector decoded = (VarCharVector) DictionaryEncoder.decode(encoded, dict)) {
+
+ assertEquals(vector.getValueCount(), decoded.getValueCount());
+ for (int i = 0; i < 5; i++) {
+ assertEquals(vector.getObject(i), decoded.getObject(i));
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testEncodeLargeVector() {
+ // Create a new value vector
+ try (final VarCharVector vector = new VarCharVector("foo", allocator);
+ final IntVector encoded = new IntVector("encoded", allocator);
+ final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) {
+ vector.allocateNew();
+ encoded.allocateNew();
+
+ int count = 10000;
+
+ for (int i = 0; i < 10000; ++i) {
+ vector.setSafe(i, data[i % 3], 0, data[i % 3].length);
+ }
+ vector.setValueCount(count);
+
+ dictionaryVector.allocateNew(512, 3);
+ dictionaryVector.setSafe(0, zero, 0, one.length);
+ dictionaryVector.setSafe(1, one, 0, two.length);
+ dictionaryVector.setSafe(2, two, 0, zero.length);
+ dictionaryVector.setValueCount(3);
+
+ HashTableDictionaryEncoder<IntVector, VarCharVector> encoder =
+ new HashTableDictionaryEncoder<>(dictionaryVector);
+ encoder.encode(vector, encoded);
+
+ assertEquals(count, encoded.getValueCount());
+ for (int i = 0; i < count; ++i) {
+ assertEquals(i % 3, encoded.get(i));
+ }
+
+ // now run through the decoder and verify we get the original back
+ Dictionary dict = new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null));
+ try (VarCharVector decoded = (VarCharVector) DictionaryEncoder.decode(encoded, dict)) {
+ assertEquals(vector.getClass(), decoded.getClass());
+ assertEquals(vector.getValueCount(), decoded.getValueCount());
+ for (int i = 0; i < count; ++i) {
+ assertEquals(vector.getObject(i), decoded.getObject(i));
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testEncodeBinaryVector() {
+ // Create a new value vector
+ try (final VarBinaryVector vector = new VarBinaryVector("foo", allocator);
+ final VarBinaryVector dictionaryVector = new VarBinaryVector("dict", allocator);
+ final IntVector encoded = new IntVector("encoded", allocator)) {
+ vector.allocateNew(512, 5);
+ vector.allocateNew();
+ encoded.allocateNew();
+
+ // set some values
+ vector.setSafe(0, zero, 0, zero.length);
+ vector.setSafe(1, one, 0, one.length);
+ vector.setSafe(2, one, 0, one.length);
+ vector.setSafe(3, two, 0, two.length);
+ vector.setSafe(4, zero, 0, zero.length);
+ vector.setValueCount(5);
+
+ // set some dictionary values
+ dictionaryVector.allocateNew(512, 3);
+ dictionaryVector.setSafe(0, zero, 0, one.length);
+ dictionaryVector.setSafe(1, one, 0, two.length);
+ dictionaryVector.setSafe(2, two, 0, zero.length);
+ dictionaryVector.setValueCount(3);
+
+ HashTableDictionaryEncoder<IntVector, VarBinaryVector> encoder =
+ new HashTableDictionaryEncoder<>(dictionaryVector);
+ encoder.encode(vector, encoded);
+
+ assertEquals(5, encoded.getValueCount());
+ assertEquals(0, encoded.get(0));
+ assertEquals(1, encoded.get(1));
+ assertEquals(1, encoded.get(2));
+ assertEquals(2, encoded.get(3));
+ assertEquals(0, encoded.get(4));
+
+ // now run through the decoder and verify we get the original back
+ Dictionary dict = new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null));
+ try (VarBinaryVector decoded = (VarBinaryVector) DictionaryEncoder.decode(encoded, dict)) {
+
+ assertEquals(vector.getClass(), decoded.getClass());
+ assertEquals(vector.getValueCount(), decoded.getValueCount());
+ for (int i = 0; i < 5; i++) {
+ assertTrue(Arrays.equals(vector.getObject(i), decoded.getObject(i)));
+ }
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestLinearDictionaryEncoder.java b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestLinearDictionaryEncoder.java
new file mode 100644
index 000000000..104d1b35b
--- /dev/null
+++ b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestLinearDictionaryEncoder.java
@@ -0,0 +1,350 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.dictionary;
+
+import static junit.framework.TestCase.assertTrue;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.Random;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.VarBinaryVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryEncoder;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Test cases for {@link LinearDictionaryEncoder}.
+ */
+public class TestLinearDictionaryEncoder {
+
+ private final int VECTOR_LENGTH = 50;
+
+ private final int DICTIONARY_LENGTH = 10;
+
+ private BufferAllocator allocator;
+
+ byte[] zero = "000".getBytes(StandardCharsets.UTF_8);
+ byte[] one = "111".getBytes(StandardCharsets.UTF_8);
+ byte[] two = "222".getBytes(StandardCharsets.UTF_8);
+
+ byte[][] data = new byte[][]{zero, one, two};
+
+ @Before
+ public void prepare() {
+ allocator = new RootAllocator(1024 * 1024);
+ }
+
+ @After
+ public void shutdown() {
+ allocator.close();
+ }
+
+ @Test
+ public void testEncodeAndDecode() {
+ Random random = new Random();
+ try (VarCharVector rawVector = new VarCharVector("original vector", allocator);
+ IntVector encodedVector = new IntVector("encoded vector", allocator);
+ VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
+
+ // set up dictionary
+ dictionary.allocateNew();
+ for (int i = 0; i < DICTIONARY_LENGTH; i++) {
+ // encode "i" as i
+ dictionary.setSafe(i, String.valueOf(i).getBytes());
+ }
+ dictionary.setValueCount(DICTIONARY_LENGTH);
+
+ // set up raw vector
+ rawVector.allocateNew(10 * VECTOR_LENGTH, VECTOR_LENGTH);
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ int val = (random.nextInt() & Integer.MAX_VALUE) % DICTIONARY_LENGTH;
+ rawVector.set(i, String.valueOf(val).getBytes());
+ }
+ rawVector.setValueCount(VECTOR_LENGTH);
+
+ LinearDictionaryEncoder<IntVector, VarCharVector> encoder =
+ new LinearDictionaryEncoder<>(dictionary, false);
+
+ // perform encoding
+ encodedVector.allocateNew();
+ encoder.encode(rawVector, encodedVector);
+
+ // verify encoding results
+ assertEquals(rawVector.getValueCount(), encodedVector.getValueCount());
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes());
+ }
+
+ // perform decoding
+ Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null));
+ try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) {
+
+ // verify decoding results
+ assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount());
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(), decodedVector.get(i));
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testEncodeAndDecodeWithNull() {
+ Random random = new Random();
+ try (VarCharVector rawVector = new VarCharVector("original vector", allocator);
+ IntVector encodedVector = new IntVector("encoded vector", allocator);
+ VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
+
+ // set up dictionary
+ dictionary.allocateNew();
+ dictionary.setNull(0);
+ for (int i = 1; i < DICTIONARY_LENGTH; i++) {
+ // encode "i" as i
+ dictionary.setSafe(i, String.valueOf(i).getBytes());
+ }
+ dictionary.setValueCount(DICTIONARY_LENGTH);
+
+ // set up raw vector
+ rawVector.allocateNew(10 * VECTOR_LENGTH, VECTOR_LENGTH);
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ if (i % 10 == 0) {
+ rawVector.setNull(i);
+ } else {
+ int val = (random.nextInt() & Integer.MAX_VALUE) % (DICTIONARY_LENGTH - 1) + 1;
+ rawVector.set(i, String.valueOf(val).getBytes());
+ }
+ }
+ rawVector.setValueCount(VECTOR_LENGTH);
+
+ LinearDictionaryEncoder<IntVector, VarCharVector> encoder =
+ new LinearDictionaryEncoder<>(dictionary, true);
+
+ // perform encoding
+ encodedVector.allocateNew();
+ encoder.encode(rawVector, encodedVector);
+
+ // verify encoding results
+ assertEquals(rawVector.getValueCount(), encodedVector.getValueCount());
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ if (i % 10 == 0) {
+ assertEquals(0, encodedVector.get(i));
+ } else {
+ assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes());
+ }
+ }
+
+ // perform decoding
+ Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null));
+ try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) {
+
+ // verify decoding results
+ assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount());
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ if (i % 10 == 0) {
+ assertTrue(decodedVector.isNull(i));
+ } else {
+ assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(), decodedVector.get(i));
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testEncodeNullWithoutNullInDictionary() {
+ try (VarCharVector rawVector = new VarCharVector("original vector", allocator);
+ IntVector encodedVector = new IntVector("encoded vector", allocator);
+ VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
+
+ // set up dictionary, with no null in it.
+ dictionary.allocateNew();
+ for (int i = 0; i < DICTIONARY_LENGTH; i++) {
+ // encode "i" as i
+ dictionary.setSafe(i, String.valueOf(i).getBytes());
+ }
+ dictionary.setValueCount(DICTIONARY_LENGTH);
+
+ // the vector to encode has a null inside.
+ rawVector.allocateNew(1);
+ rawVector.setNull(0);
+ rawVector.setValueCount(1);
+
+ encodedVector.allocateNew();
+
+ LinearDictionaryEncoder<IntVector, VarCharVector> encoder =
+ new LinearDictionaryEncoder<>(dictionary, true);
+
+ // the encoder should encode null, but no null in the dictionary,
+ // so an exception should be thrown.
+ assertThrows(IllegalArgumentException.class, () -> {
+ encoder.encode(rawVector, encodedVector);
+ });
+ }
+ }
+
+ @Test
+ public void testEncodeStrings() {
+ // Create a new value vector
+ try (final VarCharVector vector = new VarCharVector("foo", allocator);
+ final IntVector encoded = new IntVector("encoded", allocator);
+ final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) {
+
+ vector.allocateNew(512, 5);
+ encoded.allocateNew();
+
+ // set some values
+ vector.setSafe(0, zero, 0, zero.length);
+ vector.setSafe(1, one, 0, one.length);
+ vector.setSafe(2, one, 0, one.length);
+ vector.setSafe(3, two, 0, two.length);
+ vector.setSafe(4, zero, 0, zero.length);
+ vector.setValueCount(5);
+
+ // set some dictionary values
+ dictionaryVector.allocateNew(512, 3);
+ dictionaryVector.setSafe(0, zero, 0, one.length);
+ dictionaryVector.setSafe(1, one, 0, two.length);
+ dictionaryVector.setSafe(2, two, 0, zero.length);
+ dictionaryVector.setValueCount(3);
+
+ LinearDictionaryEncoder<IntVector, VarCharVector> encoder =
+ new LinearDictionaryEncoder<>(dictionaryVector);
+ encoder.encode(vector, encoded);
+
+ // verify indices
+ assertEquals(5, encoded.getValueCount());
+ assertEquals(0, encoded.get(0));
+ assertEquals(1, encoded.get(1));
+ assertEquals(1, encoded.get(2));
+ assertEquals(2, encoded.get(3));
+ assertEquals(0, encoded.get(4));
+
+ // now run through the decoder and verify we get the original back
+ Dictionary dict = new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null));
+ try (VarCharVector decoded = (VarCharVector) DictionaryEncoder.decode(encoded, dict)) {
+ assertEquals(vector.getValueCount(), decoded.getValueCount());
+ for (int i = 0; i < 5; i++) {
+ assertEquals(vector.getObject(i), decoded.getObject(i));
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testEncodeLargeVector() {
+ // Create a new value vector
+ try (final VarCharVector vector = new VarCharVector("foo", allocator);
+ final IntVector encoded = new IntVector("encoded", allocator);
+ final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) {
+ vector.allocateNew();
+ encoded.allocateNew();
+
+ int count = 10000;
+
+ for (int i = 0; i < 10000; ++i) {
+ vector.setSafe(i, data[i % 3], 0, data[i % 3].length);
+ }
+ vector.setValueCount(count);
+
+ dictionaryVector.allocateNew(512, 3);
+ dictionaryVector.setSafe(0, zero, 0, one.length);
+ dictionaryVector.setSafe(1, one, 0, two.length);
+ dictionaryVector.setSafe(2, two, 0, zero.length);
+ dictionaryVector.setValueCount(3);
+
+ LinearDictionaryEncoder<IntVector, VarCharVector> encoder =
+ new LinearDictionaryEncoder<>(dictionaryVector);
+ encoder.encode(vector, encoded);
+
+ assertEquals(count, encoded.getValueCount());
+ for (int i = 0; i < count; ++i) {
+ assertEquals(i % 3, encoded.get(i));
+ }
+
+ // now run through the decoder and verify we get the original back
+ Dictionary dict = new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null));
+ try (VarCharVector decoded = (VarCharVector) DictionaryEncoder.decode(encoded, dict)) {
+ assertEquals(vector.getClass(), decoded.getClass());
+ assertEquals(vector.getValueCount(), decoded.getValueCount());
+ for (int i = 0; i < count; ++i) {
+ assertEquals(vector.getObject(i), decoded.getObject(i));
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testEncodeBinaryVector() {
+ // Create a new value vector
+ try (final VarBinaryVector vector = new VarBinaryVector("foo", allocator);
+ final VarBinaryVector dictionaryVector = new VarBinaryVector("dict", allocator);
+ final IntVector encoded = new IntVector("encoded", allocator)) {
+ vector.allocateNew(512, 5);
+ vector.allocateNew();
+ encoded.allocateNew();
+
+ // set some values
+ vector.setSafe(0, zero, 0, zero.length);
+ vector.setSafe(1, one, 0, one.length);
+ vector.setSafe(2, one, 0, one.length);
+ vector.setSafe(3, two, 0, two.length);
+ vector.setSafe(4, zero, 0, zero.length);
+ vector.setValueCount(5);
+
+ // set some dictionary values
+ dictionaryVector.allocateNew(512, 3);
+ dictionaryVector.setSafe(0, zero, 0, one.length);
+ dictionaryVector.setSafe(1, one, 0, two.length);
+ dictionaryVector.setSafe(2, two, 0, zero.length);
+ dictionaryVector.setValueCount(3);
+
+ LinearDictionaryEncoder<IntVector, VarBinaryVector> encoder =
+ new LinearDictionaryEncoder<>(dictionaryVector);
+ encoder.encode(vector, encoded);
+
+ assertEquals(5, encoded.getValueCount());
+ assertEquals(0, encoded.get(0));
+ assertEquals(1, encoded.get(1));
+ assertEquals(1, encoded.get(2));
+ assertEquals(2, encoded.get(3));
+ assertEquals(0, encoded.get(4));
+
+ // now run through the decoder and verify we get the original back
+ Dictionary dict = new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null));
+ try (VarBinaryVector decoded = (VarBinaryVector) DictionaryEncoder.decode(encoded, dict)) {
+ assertEquals(vector.getClass(), decoded.getClass());
+ assertEquals(vector.getValueCount(), decoded.getValueCount());
+ for (int i = 0; i < 5; i++) {
+ Assert.assertTrue(Arrays.equals(vector.getObject(i), decoded.getObject(i)));
+ }
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchDictionaryEncoder.java b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchDictionaryEncoder.java
new file mode 100644
index 000000000..a156e987c
--- /dev/null
+++ b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchDictionaryEncoder.java
@@ -0,0 +1,357 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.dictionary;
+
+import static junit.framework.TestCase.assertTrue;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.Random;
+
+import org.apache.arrow.algorithm.sort.DefaultVectorComparators;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.VarBinaryVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryEncoder;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Test cases for {@link SearchDictionaryEncoder}.
+ */
+public class TestSearchDictionaryEncoder {
+
+ private final int VECTOR_LENGTH = 50;
+
+ private final int DICTIONARY_LENGTH = 10;
+
+ private BufferAllocator allocator;
+
+ byte[] zero = "000".getBytes(StandardCharsets.UTF_8);
+ byte[] one = "111".getBytes(StandardCharsets.UTF_8);
+ byte[] two = "222".getBytes(StandardCharsets.UTF_8);
+
+ byte[][] data = new byte[][]{zero, one, two};
+
+ @Before
+ public void prepare() {
+ allocator = new RootAllocator(1024 * 1024);
+ }
+
+ @After
+ public void shutdown() {
+ allocator.close();
+ }
+
+ @Test
+ public void testEncodeAndDecode() {
+ Random random = new Random();
+ try (VarCharVector rawVector = new VarCharVector("original vector", allocator);
+ IntVector encodedVector = new IntVector("encoded vector", allocator);
+ VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
+
+ // set up dictionary
+ dictionary.allocateNew();
+ for (int i = 0; i < DICTIONARY_LENGTH; i++) {
+ // encode "i" as i
+ dictionary.setSafe(i, String.valueOf(i).getBytes());
+ }
+ dictionary.setValueCount(DICTIONARY_LENGTH);
+
+ // set up raw vector
+ rawVector.allocateNew(10 * VECTOR_LENGTH, VECTOR_LENGTH);
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ int val = (random.nextInt() & Integer.MAX_VALUE) % DICTIONARY_LENGTH;
+ rawVector.set(i, String.valueOf(val).getBytes());
+ }
+ rawVector.setValueCount(VECTOR_LENGTH);
+
+ SearchDictionaryEncoder<IntVector, VarCharVector> encoder =
+ new SearchDictionaryEncoder<>(
+ dictionary, DefaultVectorComparators.createDefaultComparator(rawVector), false);
+
+ // perform encoding
+ encodedVector.allocateNew();
+ encoder.encode(rawVector, encodedVector);
+
+ // verify encoding results
+ assertEquals(rawVector.getValueCount(), encodedVector.getValueCount());
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes());
+ }
+
+ // perform decoding
+ Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null));
+ try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) {
+
+ // verify decoding results
+ assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount());
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(), decodedVector.get(i));
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testEncodeAndDecodeWithNull() {
+ Random random = new Random();
+ try (VarCharVector rawVector = new VarCharVector("original vector", allocator);
+ IntVector encodedVector = new IntVector("encoded vector", allocator);
+ VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
+
+ // set up dictionary
+ dictionary.allocateNew();
+ dictionary.setNull(0);
+ for (int i = 1; i < DICTIONARY_LENGTH; i++) {
+ // encode "i" as i
+ dictionary.setSafe(i, String.valueOf(i).getBytes());
+ }
+ dictionary.setValueCount(DICTIONARY_LENGTH);
+
+ // set up raw vector
+ rawVector.allocateNew(10 * VECTOR_LENGTH, VECTOR_LENGTH);
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ if (i % 10 == 0) {
+ rawVector.setNull(i);
+ } else {
+ int val = (random.nextInt() & Integer.MAX_VALUE) % (DICTIONARY_LENGTH - 1) + 1;
+ rawVector.set(i, String.valueOf(val).getBytes());
+ }
+ }
+ rawVector.setValueCount(VECTOR_LENGTH);
+
+ SearchDictionaryEncoder<IntVector, VarCharVector> encoder =
+ new SearchDictionaryEncoder<>(
+ dictionary, DefaultVectorComparators.createDefaultComparator(rawVector), true);
+
+ // perform encoding
+ encodedVector.allocateNew();
+ encoder.encode(rawVector, encodedVector);
+
+ // verify encoding results
+ assertEquals(rawVector.getValueCount(), encodedVector.getValueCount());
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ if (i % 10 == 0) {
+ assertEquals(0, encodedVector.get(i));
+ } else {
+ assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes());
+ }
+ }
+
+ // perform decoding
+ Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null));
+ try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) {
+
+ // verify decoding results
+ assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount());
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ if (i % 10 == 0) {
+ assertTrue(decodedVector.isNull(i));
+ } else {
+ assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(), decodedVector.get(i));
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testEncodeNullWithoutNullInDictionary() {
+ try (VarCharVector rawVector = new VarCharVector("original vector", allocator);
+ IntVector encodedVector = new IntVector("encoded vector", allocator);
+ VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
+
+ // set up dictionary, with no null in it.
+ dictionary.allocateNew();
+ for (int i = 0; i < DICTIONARY_LENGTH; i++) {
+ // encode "i" as i
+ dictionary.setSafe(i, String.valueOf(i).getBytes());
+ }
+ dictionary.setValueCount(DICTIONARY_LENGTH);
+
+ // the vector to encode has a null inside.
+ rawVector.allocateNew(1);
+ rawVector.setNull(0);
+ rawVector.setValueCount(1);
+
+ encodedVector.allocateNew();
+
+ SearchDictionaryEncoder<IntVector, VarCharVector> encoder =
+ new SearchDictionaryEncoder<>(
+ dictionary, DefaultVectorComparators.createDefaultComparator(rawVector), true);
+
+ // the encoder should encode null, but no null in the dictionary,
+ // so an exception should be thrown.
+ assertThrows(IllegalArgumentException.class, () -> {
+ encoder.encode(rawVector, encodedVector);
+ });
+ }
+ }
+
+ @Test
+ public void testEncodeStrings() {
+ // Create a new value vector
+ try (final VarCharVector vector = new VarCharVector("foo", allocator);
+ final IntVector encoded = new IntVector("encoded", allocator);
+ final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) {
+
+ vector.allocateNew(512, 5);
+ encoded.allocateNew();
+
+ // set some values
+ vector.setSafe(0, zero, 0, zero.length);
+ vector.setSafe(1, one, 0, one.length);
+ vector.setSafe(2, one, 0, one.length);
+ vector.setSafe(3, two, 0, two.length);
+ vector.setSafe(4, zero, 0, zero.length);
+ vector.setValueCount(5);
+
+ // set some dictionary values
+ dictionaryVector.allocateNew(512, 3);
+ dictionaryVector.setSafe(0, zero, 0, one.length);
+ dictionaryVector.setSafe(1, one, 0, two.length);
+ dictionaryVector.setSafe(2, two, 0, zero.length);
+ dictionaryVector.setValueCount(3);
+
+ SearchDictionaryEncoder<IntVector, VarCharVector> encoder =
+ new SearchDictionaryEncoder<>(
+ dictionaryVector, DefaultVectorComparators.createDefaultComparator(vector));
+ encoder.encode(vector, encoded);
+
+ // verify indices
+ assertEquals(5, encoded.getValueCount());
+ assertEquals(0, encoded.get(0));
+ assertEquals(1, encoded.get(1));
+ assertEquals(1, encoded.get(2));
+ assertEquals(2, encoded.get(3));
+ assertEquals(0, encoded.get(4));
+
+ // now run through the decoder and verify we get the original back
+ Dictionary dict = new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null));
+ try (VarCharVector decoded = (VarCharVector) DictionaryEncoder.decode(encoded, dict)) {
+ assertEquals(vector.getValueCount(), decoded.getValueCount());
+ for (int i = 0; i < 5; i++) {
+ assertEquals(vector.getObject(i), decoded.getObject(i));
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testEncodeLargeVector() {
+ // Create a new value vector
+ try (final VarCharVector vector = new VarCharVector("foo", allocator);
+ final IntVector encoded = new IntVector("encoded", allocator);
+ final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) {
+ vector.allocateNew();
+ encoded.allocateNew();
+
+ int count = 10000;
+
+ for (int i = 0; i < 10000; ++i) {
+ vector.setSafe(i, data[i % 3], 0, data[i % 3].length);
+ }
+ vector.setValueCount(count);
+
+ dictionaryVector.allocateNew(512, 3);
+ dictionaryVector.setSafe(0, zero, 0, one.length);
+ dictionaryVector.setSafe(1, one, 0, two.length);
+ dictionaryVector.setSafe(2, two, 0, zero.length);
+ dictionaryVector.setValueCount(3);
+
+ SearchDictionaryEncoder<IntVector, VarCharVector> encoder =
+ new SearchDictionaryEncoder<>(
+ dictionaryVector, DefaultVectorComparators.createDefaultComparator(vector));
+ encoder.encode(vector, encoded);
+
+ assertEquals(count, encoded.getValueCount());
+ for (int i = 0; i < count; ++i) {
+ assertEquals(i % 3, encoded.get(i));
+ }
+
+ // now run through the decoder and verify we get the original back
+ Dictionary dict = new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null));
+ try (VarCharVector decoded = (VarCharVector) DictionaryEncoder.decode(encoded, dict)) {
+ assertEquals(vector.getClass(), decoded.getClass());
+ assertEquals(vector.getValueCount(), decoded.getValueCount());
+ for (int i = 0; i < count; ++i) {
+ assertEquals(vector.getObject(i), decoded.getObject(i));
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testEncodeBinaryVector() {
+ // Create a new value vector
+ try (final VarBinaryVector vector = new VarBinaryVector("foo", allocator);
+ final VarBinaryVector dictionaryVector = new VarBinaryVector("dict", allocator);
+ final IntVector encoded = new IntVector("encoded", allocator)) {
+ vector.allocateNew(512, 5);
+ vector.allocateNew();
+ encoded.allocateNew();
+
+ // set some values
+ vector.setSafe(0, zero, 0, zero.length);
+ vector.setSafe(1, one, 0, one.length);
+ vector.setSafe(2, one, 0, one.length);
+ vector.setSafe(3, two, 0, two.length);
+ vector.setSafe(4, zero, 0, zero.length);
+ vector.setValueCount(5);
+
+ // set some dictionary values
+ dictionaryVector.allocateNew(512, 3);
+ dictionaryVector.setSafe(0, zero, 0, one.length);
+ dictionaryVector.setSafe(1, one, 0, two.length);
+ dictionaryVector.setSafe(2, two, 0, zero.length);
+ dictionaryVector.setValueCount(3);
+
+ SearchDictionaryEncoder<IntVector, VarBinaryVector> encoder =
+ new SearchDictionaryEncoder<>(
+ dictionaryVector, DefaultVectorComparators.createDefaultComparator(vector));
+ encoder.encode(vector, encoded);
+
+ assertEquals(5, encoded.getValueCount());
+ assertEquals(0, encoded.get(0));
+ assertEquals(1, encoded.get(1));
+ assertEquals(1, encoded.get(2));
+ assertEquals(2, encoded.get(3));
+ assertEquals(0, encoded.get(4));
+
+ // now run through the decoder and verify we get the original back
+ Dictionary dict = new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null));
+ try (VarBinaryVector decoded = (VarBinaryVector) DictionaryEncoder.decode(encoded, dict)) {
+ assertEquals(vector.getClass(), decoded.getClass());
+ assertEquals(vector.getValueCount(), decoded.getValueCount());
+ for (int i = 0; i < 5; i++) {
+ Assert.assertTrue(Arrays.equals(vector.getObject(i), decoded.getObject(i)));
+ }
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchTreeBasedDictionaryBuilder.java b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchTreeBasedDictionaryBuilder.java
new file mode 100644
index 000000000..d8e9edce8
--- /dev/null
+++ b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchTreeBasedDictionaryBuilder.java
@@ -0,0 +1,221 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.dictionary;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.arrow.algorithm.sort.DefaultVectorComparators;
+import org.apache.arrow.algorithm.sort.VectorValueComparator;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Test cases for {@link SearchTreeBasedDictionaryBuilder}.
+ */
+public class TestSearchTreeBasedDictionaryBuilder {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void prepare() {
+ allocator = new RootAllocator(1024 * 1024);
+ }
+
+ @After
+ public void shutdown() {
+ allocator.close();
+ }
+
+ @Test
+ public void testBuildVariableWidthDictionaryWithNull() {
+ try (VarCharVector vec = new VarCharVector("", allocator);
+ VarCharVector dictionary = new VarCharVector("", allocator);
+ VarCharVector sortedDictionary = new VarCharVector("", allocator)) {
+
+ vec.allocateNew(100, 10);
+ vec.setValueCount(10);
+
+ dictionary.allocateNew();
+ sortedDictionary.allocateNew();
+
+ // fill data
+ vec.set(0, "hello".getBytes());
+ vec.set(1, "abc".getBytes());
+ vec.setNull(2);
+ vec.set(3, "world".getBytes());
+ vec.set(4, "12".getBytes());
+ vec.set(5, "dictionary".getBytes());
+ vec.setNull(6);
+ vec.set(7, "hello".getBytes());
+ vec.set(8, "good".getBytes());
+ vec.set(9, "abc".getBytes());
+
+ VectorValueComparator<VarCharVector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
+ SearchTreeBasedDictionaryBuilder<VarCharVector> dictionaryBuilder =
+ new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, true);
+
+ int result = dictionaryBuilder.addValues(vec);
+
+ assertEquals(7, result);
+ assertEquals(7, dictionary.getValueCount());
+
+ dictionaryBuilder.populateSortedDictionary(sortedDictionary);
+
+ assertTrue(sortedDictionary.isNull(0));
+ assertEquals("12", new String(sortedDictionary.get(1)));
+ assertEquals("abc", new String(sortedDictionary.get(2)));
+ assertEquals("dictionary", new String(sortedDictionary.get(3)));
+ assertEquals("good", new String(sortedDictionary.get(4)));
+ assertEquals("hello", new String(sortedDictionary.get(5)));
+ assertEquals("world", new String(sortedDictionary.get(6)));
+ }
+ }
+
+ @Test
+ public void testBuildVariableWidthDictionaryWithoutNull() {
+ try (VarCharVector vec = new VarCharVector("", allocator);
+ VarCharVector dictionary = new VarCharVector("", allocator);
+ VarCharVector sortedDictionary = new VarCharVector("", allocator)) {
+
+ vec.allocateNew(100, 10);
+ vec.setValueCount(10);
+
+ dictionary.allocateNew();
+ sortedDictionary.allocateNew();
+
+ // fill data
+ vec.set(0, "hello".getBytes());
+ vec.set(1, "abc".getBytes());
+ vec.setNull(2);
+ vec.set(3, "world".getBytes());
+ vec.set(4, "12".getBytes());
+ vec.set(5, "dictionary".getBytes());
+ vec.setNull(6);
+ vec.set(7, "hello".getBytes());
+ vec.set(8, "good".getBytes());
+ vec.set(9, "abc".getBytes());
+
+ VectorValueComparator<VarCharVector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
+ SearchTreeBasedDictionaryBuilder<VarCharVector> dictionaryBuilder =
+ new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, false);
+
+ int result = dictionaryBuilder.addValues(vec);
+
+ assertEquals(6, result);
+ assertEquals(6, dictionary.getValueCount());
+
+ dictionaryBuilder.populateSortedDictionary(sortedDictionary);
+
+ assertEquals("12", new String(sortedDictionary.get(0)));
+ assertEquals("abc", new String(sortedDictionary.get(1)));
+ assertEquals("dictionary", new String(sortedDictionary.get(2)));
+ assertEquals("good", new String(sortedDictionary.get(3)));
+ assertEquals("hello", new String(sortedDictionary.get(4)));
+ assertEquals("world", new String(sortedDictionary.get(5)));
+ }
+ }
+
+ @Test
+ public void testBuildFixedWidthDictionaryWithNull() {
+ try (IntVector vec = new IntVector("", allocator);
+ IntVector dictionary = new IntVector("", allocator);
+ IntVector sortedDictionary = new IntVector("", allocator)) {
+ vec.allocateNew(10);
+ vec.setValueCount(10);
+
+ dictionary.allocateNew();
+ sortedDictionary.allocateNew();
+
+ // fill data
+ vec.set(0, 4);
+ vec.set(1, 8);
+ vec.set(2, 32);
+ vec.set(3, 8);
+ vec.set(4, 16);
+ vec.set(5, 32);
+ vec.setNull(6);
+ vec.set(7, 4);
+ vec.set(8, 4);
+ vec.setNull(9);
+
+ VectorValueComparator<IntVector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
+ SearchTreeBasedDictionaryBuilder<IntVector> dictionaryBuilder =
+ new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, true);
+
+ int result = dictionaryBuilder.addValues(vec);
+
+ assertEquals(5, result);
+ assertEquals(5, dictionary.getValueCount());
+
+ dictionaryBuilder.populateSortedDictionary(sortedDictionary);
+
+ assertTrue(sortedDictionary.isNull(0));
+ assertEquals(4, sortedDictionary.get(1));
+ assertEquals(8, sortedDictionary.get(2));
+ assertEquals(16, sortedDictionary.get(3));
+ assertEquals(32, sortedDictionary.get(4));
+ }
+ }
+
+ @Test
+ public void testBuildFixedWidthDictionaryWithoutNull() {
+ try (IntVector vec = new IntVector("", allocator);
+ IntVector dictionary = new IntVector("", allocator);
+ IntVector sortedDictionary = new IntVector("", allocator)) {
+ vec.allocateNew(10);
+ vec.setValueCount(10);
+
+ dictionary.allocateNew();
+ sortedDictionary.allocateNew();
+
+ // fill data
+ vec.set(0, 4);
+ vec.set(1, 8);
+ vec.set(2, 32);
+ vec.set(3, 8);
+ vec.set(4, 16);
+ vec.set(5, 32);
+ vec.setNull(6);
+ vec.set(7, 4);
+ vec.set(8, 4);
+ vec.setNull(9);
+
+ VectorValueComparator<IntVector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
+ SearchTreeBasedDictionaryBuilder<IntVector> dictionaryBuilder =
+ new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, false);
+
+ int result = dictionaryBuilder.addValues(vec);
+
+ assertEquals(4, result);
+ assertEquals(4, dictionary.getValueCount());
+
+ dictionaryBuilder.populateSortedDictionary(sortedDictionary);
+
+ assertEquals(4, sortedDictionary.get(0));
+ assertEquals(8, sortedDictionary.get(1));
+ assertEquals(16, sortedDictionary.get(2));
+ assertEquals(32, sortedDictionary.get(3));
+ }
+ }
+}
diff --git a/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/misc/TestPartialSumUtils.java b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/misc/TestPartialSumUtils.java
new file mode 100644
index 000000000..4e2d5900f
--- /dev/null
+++ b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/misc/TestPartialSumUtils.java
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.misc;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.IntVector;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Test cases for {@link PartialSumUtils}.
+ */
+public class TestPartialSumUtils {
+
+ private static final int PARTIAL_SUM_VECTOR_LENGTH = 101;
+
+ private static final int DELTA_VECTOR_LENGTH = 100;
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void prepare() {
+ allocator = new RootAllocator(1024 * 1024);
+ }
+
+ @After
+ public void shutdown() {
+ allocator.close();
+ }
+
+ @Test
+ public void testToPartialSumVector() {
+ try (IntVector delta = new IntVector("delta", allocator);
+ IntVector partialSum = new IntVector("partial sum", allocator)) {
+ delta.allocateNew(DELTA_VECTOR_LENGTH);
+ delta.setValueCount(DELTA_VECTOR_LENGTH);
+
+ partialSum.allocateNew(PARTIAL_SUM_VECTOR_LENGTH);
+
+ // populate delta vector
+ for (int i = 0; i < delta.getValueCount(); i++) {
+ delta.set(i, 3);
+ }
+
+ final long sumBase = 10;
+ PartialSumUtils.toPartialSumVector(delta, partialSum, sumBase);
+
+ // verify results
+ assertEquals(PARTIAL_SUM_VECTOR_LENGTH, partialSum.getValueCount());
+ for (int i = 0; i < partialSum.getValueCount(); i++) {
+ assertEquals(i * 3 + sumBase, partialSum.get(i));
+ }
+ }
+ }
+
+ @Test
+ public void testToDeltaVector() {
+ try (IntVector partialSum = new IntVector("partial sum", allocator);
+ IntVector delta = new IntVector("delta", allocator)) {
+ partialSum.allocateNew(PARTIAL_SUM_VECTOR_LENGTH);
+ partialSum.setValueCount(PARTIAL_SUM_VECTOR_LENGTH);
+
+ delta.allocateNew(DELTA_VECTOR_LENGTH);
+
+ // populate delta vector
+ final int sumBase = 10;
+ for (int i = 0; i < partialSum.getValueCount(); i++) {
+ partialSum.set(i, sumBase + 3 * i);
+ }
+
+ PartialSumUtils.toDeltaVector(partialSum, delta);
+
+ // verify results
+ assertEquals(DELTA_VECTOR_LENGTH, delta.getValueCount());
+ for (int i = 0; i < delta.getValueCount(); i++) {
+ assertEquals(3, delta.get(i));
+ }
+ }
+ }
+
+ @Test
+ public void testFindPositionInPartialSumVector() {
+ try (IntVector partialSum = new IntVector("partial sum", allocator)) {
+ partialSum.allocateNew(PARTIAL_SUM_VECTOR_LENGTH);
+ partialSum.setValueCount(PARTIAL_SUM_VECTOR_LENGTH);
+
+ // populate delta vector
+ final int sumBase = 10;
+ for (int i = 0; i < partialSum.getValueCount(); i++) {
+ partialSum.set(i, sumBase + 3 * i);
+ }
+
+ // search and verify results
+ for (int i = 0; i < PARTIAL_SUM_VECTOR_LENGTH - 1; i++) {
+ assertEquals(i, PartialSumUtils.findPositionInPartialSumVector(partialSum, sumBase + 3 * i + 1));
+ }
+ }
+ }
+
+ @Test
+ public void testFindPositionInPartialSumVectorNegative() {
+ try (IntVector partialSum = new IntVector("partial sum", allocator)) {
+ partialSum.allocateNew(PARTIAL_SUM_VECTOR_LENGTH);
+ partialSum.setValueCount(PARTIAL_SUM_VECTOR_LENGTH);
+
+ // populate delta vector
+ final int sumBase = 10;
+ for (int i = 0; i < partialSum.getValueCount(); i++) {
+ partialSum.set(i, sumBase + 3 * i);
+ }
+
+ // search and verify results
+ assertEquals(0, PartialSumUtils.findPositionInPartialSumVector(partialSum, sumBase));
+ assertEquals(-1, PartialSumUtils.findPositionInPartialSumVector(partialSum, sumBase - 1));
+ assertEquals(-1, PartialSumUtils.findPositionInPartialSumVector(partialSum,
+ sumBase + 3 * (PARTIAL_SUM_VECTOR_LENGTH - 1)));
+ }
+ }
+}
diff --git a/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/rank/TestVectorRank.java b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/rank/TestVectorRank.java
new file mode 100644
index 000000000..f372a809b
--- /dev/null
+++ b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/rank/TestVectorRank.java
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.rank;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import org.apache.arrow.algorithm.sort.DefaultVectorComparators;
+import org.apache.arrow.algorithm.sort.VectorValueComparator;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Test cases for {@link org.apache.arrow.algorithm.rank.VectorRank}.
+ */
+public class TestVectorRank {
+
+ private BufferAllocator allocator;
+
+ private static final int VECTOR_LENGTH = 10;
+
+ @Before
+ public void prepare() {
+ allocator = new RootAllocator(1024 * 1024);
+ }
+
+ @After
+ public void shutdown() {
+ allocator.close();
+ }
+
+ @Test
+ public void testFixedWidthRank() {
+ VectorRank<IntVector> rank = new VectorRank<>(allocator);
+ try (IntVector vector = new IntVector("int vec", allocator)) {
+ vector.allocateNew(VECTOR_LENGTH);
+ vector.setValueCount(VECTOR_LENGTH);
+
+ vector.set(0, 1);
+ vector.set(1, 5);
+ vector.set(2, 3);
+ vector.set(3, 7);
+ vector.set(4, 9);
+ vector.set(5, 8);
+ vector.set(6, 2);
+ vector.set(7, 0);
+ vector.set(8, 4);
+ vector.set(9, 6);
+
+ VectorValueComparator<IntVector> comparator =
+ DefaultVectorComparators.createDefaultComparator(vector);
+ assertEquals(7, rank.indexAtRank(vector, comparator, 0));
+ assertEquals(0, rank.indexAtRank(vector, comparator, 1));
+ assertEquals(6, rank.indexAtRank(vector, comparator, 2));
+ assertEquals(2, rank.indexAtRank(vector, comparator, 3));
+ assertEquals(8, rank.indexAtRank(vector, comparator, 4));
+ assertEquals(1, rank.indexAtRank(vector, comparator, 5));
+ assertEquals(9, rank.indexAtRank(vector, comparator, 6));
+ assertEquals(3, rank.indexAtRank(vector, comparator, 7));
+ assertEquals(5, rank.indexAtRank(vector, comparator, 8));
+ assertEquals(4, rank.indexAtRank(vector, comparator, 9));
+ }
+ }
+
+ @Test
+ public void testVariableWidthRank() {
+ VectorRank<VarCharVector> rank = new VectorRank<>(allocator);
+ try (VarCharVector vector = new VarCharVector("varchar vec", allocator)) {
+ vector.allocateNew(VECTOR_LENGTH * 5, VECTOR_LENGTH);
+ vector.setValueCount(VECTOR_LENGTH);
+
+ vector.set(0, String.valueOf(1).getBytes());
+ vector.set(1, String.valueOf(5).getBytes());
+ vector.set(2, String.valueOf(3).getBytes());
+ vector.set(3, String.valueOf(7).getBytes());
+ vector.set(4, String.valueOf(9).getBytes());
+ vector.set(5, String.valueOf(8).getBytes());
+ vector.set(6, String.valueOf(2).getBytes());
+ vector.set(7, String.valueOf(0).getBytes());
+ vector.set(8, String.valueOf(4).getBytes());
+ vector.set(9, String.valueOf(6).getBytes());
+
+ VectorValueComparator<VarCharVector> comparator =
+ DefaultVectorComparators.createDefaultComparator(vector);
+
+ assertEquals(7, rank.indexAtRank(vector, comparator, 0));
+ assertEquals(0, rank.indexAtRank(vector, comparator, 1));
+ assertEquals(6, rank.indexAtRank(vector, comparator, 2));
+ assertEquals(2, rank.indexAtRank(vector, comparator, 3));
+ assertEquals(8, rank.indexAtRank(vector, comparator, 4));
+ assertEquals(1, rank.indexAtRank(vector, comparator, 5));
+ assertEquals(9, rank.indexAtRank(vector, comparator, 6));
+ assertEquals(3, rank.indexAtRank(vector, comparator, 7));
+ assertEquals(5, rank.indexAtRank(vector, comparator, 8));
+ assertEquals(4, rank.indexAtRank(vector, comparator, 9));
+ }
+ }
+
+ @Test
+ public void testRankNegative() {
+ VectorRank<IntVector> rank = new VectorRank<>(allocator);
+ try (IntVector vector = new IntVector("int vec", allocator)) {
+ vector.allocateNew(VECTOR_LENGTH);
+ vector.setValueCount(VECTOR_LENGTH);
+
+ vector.set(0, 1);
+ vector.set(1, 5);
+ vector.set(2, 3);
+ vector.set(3, 7);
+ vector.set(4, 9);
+ vector.set(5, 8);
+ vector.set(6, 2);
+ vector.set(7, 0);
+ vector.set(8, 4);
+ vector.set(9, 6);
+
+ VectorValueComparator<IntVector> comparator =
+ DefaultVectorComparators.createDefaultComparator(vector);
+
+ assertThrows(IllegalArgumentException.class, () -> {
+ rank.indexAtRank(vector, comparator, VECTOR_LENGTH + 1);
+ });
+ }
+ }
+}
diff --git a/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestParallelSearcher.java b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestParallelSearcher.java
new file mode 100644
index 000000000..767935aaa
--- /dev/null
+++ b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestParallelSearcher.java
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.search;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+
+import org.apache.arrow.algorithm.sort.DefaultVectorComparators;
+import org.apache.arrow.algorithm.sort.VectorValueComparator;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+/**
+ * Test cases for {@link ParallelSearcher}.
+ */
+@RunWith(Parameterized.class)
+public class TestParallelSearcher {
+
+ private enum ComparatorType {
+ EqualityComparator,
+ OrderingComparator;
+ }
+
+ private static final int VECTOR_LENGTH = 10000;
+
+ private final int threadCount;
+
+ private BufferAllocator allocator;
+
+ private ExecutorService threadPool;
+
+ private final ComparatorType comparatorType;
+
+ public TestParallelSearcher(ComparatorType comparatorType, int threadCount) {
+ this.comparatorType = comparatorType;
+ this.threadCount = threadCount;
+ }
+
+ @Parameterized.Parameters(name = "comparator type = {0}, thread count = {1}")
+ public static Collection<Object[]> getComparatorName() {
+ List<Object[]> params = new ArrayList<>();
+ int[] threadCounts = {1, 2, 5, 10, 20, 50};
+ for (ComparatorType type : ComparatorType.values()) {
+ for (int count : threadCounts) {
+ params.add(new Object[] {type, count});
+ }
+ }
+ return params;
+ }
+
+ @Before
+ public void prepare() {
+ allocator = new RootAllocator(1024 * 1024);
+ threadPool = Executors.newFixedThreadPool(threadCount);
+ }
+
+ @After
+ public void shutdown() {
+ allocator.close();
+ threadPool.shutdown();
+ }
+
+ @Test
+ public void testParallelIntSearch() throws ExecutionException, InterruptedException {
+ try (IntVector targetVector = new IntVector("targetVector", allocator);
+ IntVector keyVector = new IntVector("keyVector", allocator)) {
+ targetVector.allocateNew(VECTOR_LENGTH);
+ keyVector.allocateNew(VECTOR_LENGTH);
+
+ // if we are comparing elements using equality semantics, we do not need a comparator here.
+ VectorValueComparator<IntVector> comparator = comparatorType == ComparatorType.EqualityComparator ? null
+ : DefaultVectorComparators.createDefaultComparator(targetVector);
+
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ targetVector.set(i, i);
+ keyVector.set(i, i * 2);
+ }
+ targetVector.setValueCount(VECTOR_LENGTH);
+ keyVector.setValueCount(VECTOR_LENGTH);
+
+ ParallelSearcher<IntVector> searcher = new ParallelSearcher<>(targetVector, threadPool, threadCount);
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ int pos = comparator == null ? searcher.search(keyVector, i) : searcher.search(keyVector, i, comparator);
+ if (i * 2 < VECTOR_LENGTH) {
+ assertEquals(i * 2, pos);
+ } else {
+ assertEquals(-1, pos);
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testParallelStringSearch() throws ExecutionException, InterruptedException {
+ try (VarCharVector targetVector = new VarCharVector("targetVector", allocator);
+ VarCharVector keyVector = new VarCharVector("keyVector", allocator)) {
+ targetVector.allocateNew(VECTOR_LENGTH);
+ keyVector.allocateNew(VECTOR_LENGTH);
+
+ // if we are comparing elements using equality semantics, we do not need a comparator here.
+ VectorValueComparator<VarCharVector> comparator = comparatorType == ComparatorType.EqualityComparator ? null
+ : DefaultVectorComparators.createDefaultComparator(targetVector);
+
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ targetVector.setSafe(i, String.valueOf(i).getBytes());
+ keyVector.setSafe(i, String.valueOf(i * 2).getBytes());
+ }
+ targetVector.setValueCount(VECTOR_LENGTH);
+ keyVector.setValueCount(VECTOR_LENGTH);
+
+ ParallelSearcher<VarCharVector> searcher = new ParallelSearcher<>(targetVector, threadPool, threadCount);
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ int pos = comparator == null ? searcher.search(keyVector, i) : searcher.search(keyVector, i, comparator);
+ if (i * 2 < VECTOR_LENGTH) {
+ assertEquals(i * 2, pos);
+ } else {
+ assertEquals(-1, pos);
+ }
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorRangeSearcher.java b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorRangeSearcher.java
new file mode 100644
index 000000000..d7659dc4c
--- /dev/null
+++ b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorRangeSearcher.java
@@ -0,0 +1,195 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.search;
+
+import static org.junit.Assert.assertEquals;
+
+import java.util.Arrays;
+import java.util.Collection;
+
+import org.apache.arrow.algorithm.sort.DefaultVectorComparators;
+import org.apache.arrow.algorithm.sort.VectorValueComparator;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.IntVector;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+/**
+ * Test cases for {@link VectorRangeSearcher}.
+ */
+@RunWith(Parameterized.class)
+public class TestVectorRangeSearcher {
+
+ private BufferAllocator allocator;
+
+ private int repeat;
+
+ public TestVectorRangeSearcher(int repeat) {
+ this.repeat = repeat;
+ }
+
+ @Before
+ public void prepare() {
+ allocator = new RootAllocator(1024 * 1024);
+ }
+
+ @After
+ public void shutdown() {
+ allocator.close();
+ }
+
+ @Test
+ public void testGetLowerBounds() {
+ final int maxValue = 100;
+ try (IntVector intVector = new IntVector("int vec", allocator)) {
+ // allocate vector
+ intVector.allocateNew(maxValue * repeat);
+ intVector.setValueCount(maxValue * repeat);
+
+ // prepare data in sorted order
+ // each value is repeated some times
+ for (int i = 0; i < maxValue; i++) {
+ for (int j = 0; j < repeat; j++) {
+ if (i == 0) {
+ intVector.setNull(i * repeat + j);
+ } else {
+ intVector.set(i * repeat + j, i);
+ }
+ }
+ }
+
+ // do search
+ VectorValueComparator<IntVector> comparator = DefaultVectorComparators.createDefaultComparator(intVector);
+ for (int i = 0; i < maxValue; i++) {
+ int result = VectorRangeSearcher.getFirstMatch(intVector, comparator, intVector, i * repeat);
+ assertEquals(i * repeat, result);
+ }
+ }
+ }
+
+ @Test
+ public void testGetLowerBoundsNegative() {
+ final int maxValue = 100;
+ try (IntVector intVector = new IntVector("int vec", allocator);
+ IntVector negVector = new IntVector("neg vec", allocator)) {
+ // allocate vector
+ intVector.allocateNew(maxValue * repeat);
+ intVector.setValueCount(maxValue * repeat);
+
+ negVector.allocateNew(maxValue);
+ negVector.setValueCount(maxValue);
+
+ // prepare data in sorted order
+ // each value is repeated some times
+ for (int i = 0; i < maxValue; i++) {
+ for (int j = 0; j < repeat; j++) {
+ if (i == 0) {
+ intVector.setNull(i * repeat + j);
+ } else {
+ intVector.set(i * repeat + j, i);
+ }
+ }
+ negVector.set(i, maxValue + i);
+ }
+
+ // do search
+ VectorValueComparator<IntVector> comparator = DefaultVectorComparators.createDefaultComparator(intVector);
+ for (int i = 0; i < maxValue; i++) {
+ int result = VectorRangeSearcher.getFirstMatch(intVector, comparator, negVector, i);
+ assertEquals(-1, result);
+ }
+ }
+ }
+
+ @Test
+ public void testGetUpperBounds() {
+ final int maxValue = 100;
+ try (IntVector intVector = new IntVector("int vec", allocator)) {
+ // allocate vector
+ intVector.allocateNew(maxValue * repeat);
+ intVector.setValueCount(maxValue * repeat);
+
+ // prepare data in sorted order
+ // each value is repeated some times
+ for (int i = 0; i < maxValue; i++) {
+ for (int j = 0; j < repeat; j++) {
+ if (i == 0) {
+ intVector.setNull(i * repeat + j);
+ } else {
+ intVector.set(i * repeat + j, i);
+ }
+ }
+ }
+
+ // do search
+ VectorValueComparator<IntVector> comparator = DefaultVectorComparators.createDefaultComparator(intVector);
+ for (int i = 0; i < maxValue; i++) {
+ int result = VectorRangeSearcher.getLastMatch(intVector, comparator, intVector, i * repeat);
+ assertEquals((i + 1) * repeat - 1, result);
+ }
+ }
+ }
+
+ @Test
+ public void testGetUpperBoundsNegative() {
+ final int maxValue = 100;
+ try (IntVector intVector = new IntVector("int vec", allocator);
+ IntVector negVector = new IntVector("neg vec", allocator)) {
+ // allocate vector
+ intVector.allocateNew(maxValue * repeat);
+ intVector.setValueCount(maxValue * repeat);
+
+ negVector.allocateNew(maxValue);
+ negVector.setValueCount(maxValue);
+
+ // prepare data in sorted order
+ // each value is repeated some times
+ for (int i = 0; i < maxValue; i++) {
+ for (int j = 0; j < repeat; j++) {
+ if (i == 0) {
+ intVector.setNull(i * repeat + j);
+ } else {
+ intVector.set(i * repeat + j, i);
+ }
+ }
+ negVector.set(i, maxValue + i);
+ }
+
+ // do search
+ VectorValueComparator<IntVector> comparator = DefaultVectorComparators.createDefaultComparator(intVector);
+ for (int i = 0; i < maxValue; i++) {
+ int result = VectorRangeSearcher.getLastMatch(intVector, comparator, negVector, i);
+ assertEquals(-1, result);
+ }
+ }
+ }
+
+ @Parameterized.Parameters(name = "repeat = {0}")
+ public static Collection<Object[]> getRepeat() {
+ return Arrays.asList(
+ new Object[]{1},
+ new Object[]{2},
+ new Object[]{5},
+ new Object[]{10}
+ );
+ }
+}
diff --git a/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java
new file mode 100644
index 000000000..2847ddbb8
--- /dev/null
+++ b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java
@@ -0,0 +1,299 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.search;
+
+import static org.apache.arrow.vector.complex.BaseRepeatedValueVector.OFFSET_WIDTH;
+import static org.junit.Assert.assertEquals;
+
+import org.apache.arrow.algorithm.sort.DefaultVectorComparators;
+import org.apache.arrow.algorithm.sort.VectorValueComparator;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BaseVariableWidthVector;
+import org.apache.arrow.vector.BitVectorHelper;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Test cases for {@link org.apache.arrow.algorithm.search.VectorSearcher}.
+ */
+public class TestVectorSearcher {
+
+ private final int VECTOR_LENGTH = 100;
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void prepare() {
+ allocator = new RootAllocator(1024 * 1024);
+ }
+
+ @After
+ public void shutdown() {
+ allocator.close();
+ }
+
+ @Test
+ public void testBinarySearchInt() {
+ try (IntVector rawVector = new IntVector("", allocator);
+ IntVector negVector = new IntVector("", allocator)) {
+ rawVector.allocateNew(VECTOR_LENGTH);
+ rawVector.setValueCount(VECTOR_LENGTH);
+ negVector.allocateNew(1);
+ negVector.setValueCount(1);
+
+ // prepare data in sorted order
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ if (i == 0) {
+ rawVector.setNull(i);
+ } else {
+ rawVector.set(i, i);
+ }
+ }
+ negVector.set(0, -333);
+
+ // do search
+ VectorValueComparator<IntVector> comparator =
+ DefaultVectorComparators.createDefaultComparator(rawVector);
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ int result = VectorSearcher.binarySearch(rawVector, comparator, rawVector, i);
+ assertEquals(i, result);
+ }
+
+ // negative case
+ assertEquals(-1, VectorSearcher.binarySearch(rawVector, comparator, negVector, 0));
+ }
+ }
+
+ @Test
+ public void testLinearSearchInt() {
+ try (IntVector rawVector = new IntVector("", allocator);
+ IntVector negVector = new IntVector("", allocator)) {
+ rawVector.allocateNew(VECTOR_LENGTH);
+ rawVector.setValueCount(VECTOR_LENGTH);
+ negVector.allocateNew(1);
+ negVector.setValueCount(1);
+
+ // prepare data in sorted order
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ if (i == 0) {
+ rawVector.setNull(i);
+ } else {
+ rawVector.set(i, i);
+ }
+ }
+ negVector.set(0, -333);
+
+ // do search
+ VectorValueComparator<IntVector> comparator =
+ DefaultVectorComparators.createDefaultComparator(rawVector);
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ int result = VectorSearcher.linearSearch(rawVector, comparator, rawVector, i);
+ assertEquals(i, result);
+ }
+
+ // negative case
+ assertEquals(-1, VectorSearcher.linearSearch(rawVector, comparator, negVector, 0));
+ }
+ }
+
+ @Test
+ public void testBinarySearchVarChar() {
+ try (VarCharVector rawVector = new VarCharVector("", allocator);
+ VarCharVector negVector = new VarCharVector("", allocator)) {
+ rawVector.allocateNew(VECTOR_LENGTH * 16, VECTOR_LENGTH);
+ rawVector.setValueCount(VECTOR_LENGTH);
+ negVector.allocateNew(VECTOR_LENGTH, 1);
+ negVector.setValueCount(1);
+
+ byte[] content = new byte[2];
+
+ // prepare data in sorted order
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ if (i == 0) {
+ rawVector.setNull(i);
+ } else {
+ int q = i / 10;
+ int r = i % 10;
+
+ content[0] = (byte) ('a' + q);
+ content[1] = (byte) r;
+ rawVector.set(i, content);
+ }
+ }
+ negVector.set(0, "abcd".getBytes());
+
+ // do search
+ VectorValueComparator<BaseVariableWidthVector> comparator =
+ DefaultVectorComparators.createDefaultComparator(rawVector);
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ int result = VectorSearcher.binarySearch(rawVector, comparator, rawVector, i);
+ assertEquals(i, result);
+ }
+
+ // negative case
+ assertEquals(-1, VectorSearcher.binarySearch(rawVector, comparator, negVector, 0));
+ }
+ }
+
+ @Test
+ public void testLinearSearchVarChar() {
+ try (VarCharVector rawVector = new VarCharVector("", allocator);
+ VarCharVector negVector = new VarCharVector("", allocator)) {
+ rawVector.allocateNew(VECTOR_LENGTH * 16, VECTOR_LENGTH);
+ rawVector.setValueCount(VECTOR_LENGTH);
+ negVector.allocateNew(VECTOR_LENGTH, 1);
+ negVector.setValueCount(1);
+
+ byte[] content = new byte[2];
+
+ // prepare data in sorted order
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ if (i == 0) {
+ rawVector.setNull(i);
+ } else {
+ int q = i / 10;
+ int r = i % 10;
+
+ content[0] = (byte) ('a' + q);
+ content[1] = (byte) r;
+ rawVector.set(i, content);
+ }
+ }
+ negVector.set(0, "abcd".getBytes());
+
+ // do search
+ VectorValueComparator<BaseVariableWidthVector> comparator =
+ DefaultVectorComparators.createDefaultComparator(rawVector);
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ int result = VectorSearcher.linearSearch(rawVector, comparator, rawVector, i);
+ assertEquals(i, result);
+ }
+
+ // negative case
+ assertEquals(-1, VectorSearcher.linearSearch(rawVector, comparator, negVector, 0));
+ }
+ }
+
+ private ListVector createListVector() {
+ final int innerCount = 100;
+ final int outerCount = 10;
+ final int listLength = innerCount / outerCount;
+
+ ListVector listVector = ListVector.empty("list vector", allocator);
+
+ Types.MinorType type = Types.MinorType.INT;
+ listVector.addOrGetVector(FieldType.nullable(type.getType()));
+
+ listVector.allocateNew();
+
+ IntVector dataVector = (IntVector) listVector.getDataVector();
+
+ for (int i = 0; i < innerCount; i++) {
+ dataVector.set(i, i);
+ }
+ dataVector.setValueCount(innerCount);
+
+ for (int i = 0; i < outerCount; i++) {
+ BitVectorHelper.setBit(listVector.getValidityBuffer(), i);
+ listVector.getOffsetBuffer().setInt(i * OFFSET_WIDTH, i * listLength);
+ listVector.getOffsetBuffer().setInt((i + 1) * OFFSET_WIDTH, (i + 1) * listLength);
+ }
+ listVector.setLastSet(outerCount - 1);
+ listVector.setValueCount(outerCount);
+
+ return listVector;
+ }
+
+ private ListVector createNegativeListVector() {
+ final int innerCount = 100;
+ final int outerCount = 10;
+ final int listLength = innerCount / outerCount;
+
+ ListVector listVector = ListVector.empty("list vector", allocator);
+
+ Types.MinorType type = Types.MinorType.INT;
+ listVector.addOrGetVector(FieldType.nullable(type.getType()));
+
+ listVector.allocateNew();
+
+ IntVector dataVector = (IntVector) listVector.getDataVector();
+
+ for (int i = 0; i < innerCount; i++) {
+ dataVector.set(i, i + 1000);
+ }
+ dataVector.setValueCount(innerCount);
+
+ for (int i = 0; i < outerCount; i++) {
+ BitVectorHelper.setBit(listVector.getValidityBuffer(), i);
+ listVector.getOffsetBuffer().setInt(i * OFFSET_WIDTH, i * listLength);
+ listVector.getOffsetBuffer().setInt((i + 1) * OFFSET_WIDTH, (i + 1) * listLength);
+ }
+ listVector.setValueCount(outerCount);
+
+ return listVector;
+ }
+
+ @Test
+ public void testBinarySearchList() {
+ try (ListVector rawVector = createListVector();
+ ListVector negVector = createNegativeListVector()) {
+
+ // do search
+ VectorValueComparator<ListVector> comparator =
+ DefaultVectorComparators.createDefaultComparator(rawVector);
+ for (int i = 0; i < rawVector.getValueCount(); i++) {
+ int result = VectorSearcher.binarySearch(rawVector, comparator, rawVector, i);
+ assertEquals(i, result);
+ }
+
+ // negative case
+ for (int i = 0; i < rawVector.getValueCount(); i++) {
+ int result = VectorSearcher.binarySearch(rawVector, comparator, negVector, i);
+ assertEquals(-1, result);
+ }
+ }
+ }
+
+ @Test
+ public void testLinearSearchList() {
+ try (ListVector rawVector = createListVector();
+ ListVector negVector = createNegativeListVector()) {
+
+ // do search
+ VectorValueComparator<ListVector> comparator =
+ DefaultVectorComparators.createDefaultComparator(rawVector);
+ for (int i = 0; i < rawVector.getValueCount(); i++) {
+ int result = VectorSearcher.linearSearch(rawVector, comparator, rawVector, i);
+ assertEquals(i, result);
+ }
+
+ // negative case
+ for (int i = 0; i < rawVector.getValueCount(); i++) {
+ int result = VectorSearcher.linearSearch(rawVector, comparator, negVector, i);
+ assertEquals(-1, result);
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestCompositeVectorComparator.java b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestCompositeVectorComparator.java
new file mode 100644
index 000000000..cac9933cc
--- /dev/null
+++ b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestCompositeVectorComparator.java
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.sort;
+
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.util.Arrays;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Test cases for {@link CompositeVectorComparator}.
+ */
+public class TestCompositeVectorComparator {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void prepare() {
+ allocator = new RootAllocator(1024 * 1024);
+ }
+
+ @After
+ public void shutdown() {
+ allocator.close();
+ }
+
+ @Test
+ public void testCompareVectorSchemaRoot() {
+ final int vectorLength = 10;
+ IntVector intVec1 = new IntVector("int1", allocator);
+ VarCharVector strVec1 = new VarCharVector("str1", allocator);
+
+ IntVector intVec2 = new IntVector("int2", allocator);
+ VarCharVector strVec2 = new VarCharVector("str2", allocator);
+
+ try (VectorSchemaRoot batch1 = new VectorSchemaRoot(Arrays.asList(intVec1, strVec1));
+ VectorSchemaRoot batch2 = new VectorSchemaRoot(Arrays.asList(intVec2, strVec2))) {
+
+ intVec1.allocateNew(vectorLength);
+ strVec1.allocateNew(vectorLength * 10, vectorLength);
+ intVec2.allocateNew(vectorLength);
+ strVec2.allocateNew(vectorLength * 10, vectorLength);
+
+ for (int i = 0; i < vectorLength; i++) {
+ intVec1.set(i, i);
+ strVec1.set(i, new String("a" + i).getBytes());
+ intVec2.set(i, i);
+ strVec2.set(i, new String("a5").getBytes());
+ }
+
+ VectorValueComparator<IntVector> innerComparator1 =
+ DefaultVectorComparators.createDefaultComparator(intVec1);
+ innerComparator1.attachVectors(intVec1, intVec2);
+ VectorValueComparator<VarCharVector> innerComparator2 =
+ DefaultVectorComparators.createDefaultComparator(strVec1);
+ innerComparator2.attachVectors(strVec1, strVec2);
+
+ VectorValueComparator<ValueVector> comparator = new CompositeVectorComparator(
+ new VectorValueComparator[]{innerComparator1, innerComparator2}
+ );
+
+ // verify results
+
+ // both elements are equal, the result is equal
+ assertTrue(comparator.compare(5, 5) == 0);
+
+ // the first element being equal, the second is smaller, and the result is smaller
+ assertTrue(comparator.compare(1, 1) < 0);
+ assertTrue(comparator.compare(2, 2) < 0);
+ assertTrue(comparator.compare(3, 3) < 0);
+
+ // the first element being equal, the second is larger, and the result is larger
+ assertTrue(comparator.compare(7, 7) > 0);
+ assertTrue(comparator.compare(8, 8) > 0);
+ assertTrue(comparator.compare(9, 9) > 0);
+
+ // the first element is smaller, the result is always smaller
+ assertTrue(comparator.compare(1, 2) < 0);
+ assertTrue(comparator.compare(3, 7) < 0);
+ assertTrue(comparator.compare(4, 9) < 0);
+
+ // the first element is larger, the result is always larger
+ assertTrue(comparator.compare(2, 0) > 0);
+ assertTrue(comparator.compare(8, 7) > 0);
+ assertTrue(comparator.compare(4, 1) > 0);
+ }
+ }
+}
diff --git a/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java
new file mode 100644
index 000000000..2fbf598bf
--- /dev/null
+++ b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java
@@ -0,0 +1,393 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.sort;
+
+import static org.apache.arrow.vector.complex.BaseRepeatedValueVector.OFFSET_WIDTH;
+import static org.junit.Assert.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.SmallIntVector;
+import org.apache.arrow.vector.TinyIntVector;
+import org.apache.arrow.vector.UInt1Vector;
+import org.apache.arrow.vector.UInt2Vector;
+import org.apache.arrow.vector.UInt4Vector;
+import org.apache.arrow.vector.UInt8Vector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.testing.ValueVectorDataPopulator;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Test cases for {@link DefaultVectorComparators}.
+ */
+public class TestDefaultVectorComparator {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void prepare() {
+ allocator = new RootAllocator(1024 * 1024);
+ }
+
+ @After
+ public void shutdown() {
+ allocator.close();
+ }
+
+ private ListVector createListVector(int count) {
+ ListVector listVector = ListVector.empty("list vector", allocator);
+ Types.MinorType type = Types.MinorType.INT;
+ listVector.addOrGetVector(FieldType.nullable(type.getType()));
+ listVector.allocateNew();
+
+ IntVector dataVector = (IntVector) listVector.getDataVector();
+
+ for (int i = 0; i < count; i++) {
+ dataVector.set(i, i);
+ }
+ dataVector.setValueCount(count);
+
+ listVector.setNotNull(0);
+
+ listVector.getOffsetBuffer().setInt(0, 0);
+ listVector.getOffsetBuffer().setInt(OFFSET_WIDTH, count);
+
+ listVector.setLastSet(0);
+ listVector.setValueCount(1);
+
+ return listVector;
+ }
+
+ @Test
+ public void testCompareLists() {
+ try (ListVector listVector1 = createListVector(10);
+ ListVector listVector2 = createListVector(11)) {
+ VectorValueComparator<ListVector> comparator =
+ DefaultVectorComparators.createDefaultComparator(listVector1);
+ comparator.attachVectors(listVector1, listVector2);
+
+ // prefix is smaller
+ assertTrue(comparator.compare(0, 0) < 0);
+ }
+
+ try (ListVector listVector1 = createListVector(11);
+ ListVector listVector2 = createListVector(11)) {
+ ((IntVector) listVector2.getDataVector()).set(10, 110);
+
+ VectorValueComparator<ListVector> comparator =
+ DefaultVectorComparators.createDefaultComparator(listVector1);
+ comparator.attachVectors(listVector1, listVector2);
+
+ // breaking tie by the last element
+ assertTrue(comparator.compare(0, 0) < 0);
+ }
+
+ try (ListVector listVector1 = createListVector(10);
+ ListVector listVector2 = createListVector(10)) {
+
+ VectorValueComparator<ListVector> comparator =
+ DefaultVectorComparators.createDefaultComparator(listVector1);
+ comparator.attachVectors(listVector1, listVector2);
+
+ // list vector elements equal
+ assertTrue(comparator.compare(0, 0) == 0);
+ }
+ }
+
+ @Test
+ public void testCopiedComparatorForLists() {
+ for (int i = 1; i < 10; i++) {
+ for (int j = 1; j < 10; j++) {
+ try (ListVector listVector1 = createListVector(10);
+ ListVector listVector2 = createListVector(11)) {
+ VectorValueComparator<ListVector> comparator =
+ DefaultVectorComparators.createDefaultComparator(listVector1);
+ comparator.attachVectors(listVector1, listVector2);
+
+ VectorValueComparator<ListVector> copyComparator = comparator.createNew();
+ copyComparator.attachVectors(listVector1, listVector2);
+
+ assertEquals(comparator.compare(0, 0), copyComparator.compare(0, 0));
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testCompareUInt1() {
+ try (UInt1Vector vec = new UInt1Vector("", allocator)) {
+ vec.allocateNew(10);
+ vec.setValueCount(10);
+
+ vec.setNull(0);
+ vec.set(1, -2);
+ vec.set(2, -1);
+ vec.set(3, 0);
+ vec.set(4, 1);
+ vec.set(5, 2);
+ vec.set(6, -2);
+ vec.setNull(7);
+ vec.set(8, Byte.MAX_VALUE);
+ vec.set(9, Byte.MIN_VALUE);
+
+ VectorValueComparator<UInt1Vector> comparator =
+ DefaultVectorComparators.createDefaultComparator(vec);
+ comparator.attachVector(vec);
+
+ assertTrue(comparator.compare(0, 1) < 0);
+ assertTrue(comparator.compare(1, 2) < 0);
+ assertTrue(comparator.compare(1, 3) > 0);
+ assertTrue(comparator.compare(2, 5) > 0);
+ assertTrue(comparator.compare(4, 5) < 0);
+ assertTrue(comparator.compare(1, 6) == 0);
+ assertTrue(comparator.compare(0, 7) == 0);
+ assertTrue(comparator.compare(8, 9) < 0);
+ assertTrue(comparator.compare(4, 8) < 0);
+ assertTrue(comparator.compare(5, 9) < 0);
+ assertTrue(comparator.compare(2, 9) > 0);
+ }
+ }
+
+ @Test
+ public void testCompareUInt2() {
+ try (UInt2Vector vec = new UInt2Vector("", allocator)) {
+ vec.allocateNew(10);
+
+ ValueVectorDataPopulator.setVector(
+ vec, null, (char) -2, (char) -1, (char) 0, (char) 1, (char) 2, (char) -2, null,
+ '\u7FFF', // value for the max 16-byte signed integer
+ '\u8000' // value for the min 16-byte signed integer
+ );
+
+ VectorValueComparator<UInt2Vector> comparator =
+ DefaultVectorComparators.createDefaultComparator(vec);
+ comparator.attachVector(vec);
+
+ assertTrue(comparator.compare(0, 1) < 0);
+ assertTrue(comparator.compare(1, 2) < 0);
+ assertTrue(comparator.compare(1, 3) > 0);
+ assertTrue(comparator.compare(2, 5) > 0);
+ assertTrue(comparator.compare(4, 5) < 0);
+ assertTrue(comparator.compare(1, 6) == 0);
+ assertTrue(comparator.compare(0, 7) == 0);
+ assertTrue(comparator.compare(8, 9) < 0);
+ assertTrue(comparator.compare(4, 8) < 0);
+ assertTrue(comparator.compare(5, 9) < 0);
+ assertTrue(comparator.compare(2, 9) > 0);
+ }
+ }
+
+ @Test
+ public void testCompareUInt4() {
+ try (UInt4Vector vec = new UInt4Vector("", allocator)) {
+ vec.allocateNew(10);
+ vec.setValueCount(10);
+
+ vec.setNull(0);
+ vec.set(1, -2);
+ vec.set(2, -1);
+ vec.set(3, 0);
+ vec.set(4, 1);
+ vec.set(5, 2);
+ vec.set(6, -2);
+ vec.setNull(7);
+ vec.set(8, Integer.MAX_VALUE);
+ vec.set(9, Integer.MIN_VALUE);
+
+ VectorValueComparator<UInt4Vector> comparator =
+ DefaultVectorComparators.createDefaultComparator(vec);
+ comparator.attachVector(vec);
+
+ assertTrue(comparator.compare(0, 1) < 0);
+ assertTrue(comparator.compare(1, 2) < 0);
+ assertTrue(comparator.compare(1, 3) > 0);
+ assertTrue(comparator.compare(2, 5) > 0);
+ assertTrue(comparator.compare(4, 5) < 0);
+ assertTrue(comparator.compare(1, 6) == 0);
+ assertTrue(comparator.compare(0, 7) == 0);
+ assertTrue(comparator.compare(8, 9) < 0);
+ assertTrue(comparator.compare(4, 8) < 0);
+ assertTrue(comparator.compare(5, 9) < 0);
+ assertTrue(comparator.compare(2, 9) > 0);
+ }
+ }
+
+ @Test
+ public void testCompareUInt8() {
+ try (UInt8Vector vec = new UInt8Vector("", allocator)) {
+ vec.allocateNew(10);
+ vec.setValueCount(10);
+
+ vec.setNull(0);
+ vec.set(1, -2);
+ vec.set(2, -1);
+ vec.set(3, 0);
+ vec.set(4, 1);
+ vec.set(5, 2);
+ vec.set(6, -2);
+ vec.setNull(7);
+ vec.set(8, Long.MAX_VALUE);
+ vec.set(9, Long.MIN_VALUE);
+
+ VectorValueComparator<UInt8Vector> comparator =
+ DefaultVectorComparators.createDefaultComparator(vec);
+ comparator.attachVector(vec);
+
+ assertTrue(comparator.compare(0, 1) < 0);
+ assertTrue(comparator.compare(1, 2) < 0);
+ assertTrue(comparator.compare(1, 3) > 0);
+ assertTrue(comparator.compare(2, 5) > 0);
+ assertTrue(comparator.compare(4, 5) < 0);
+ assertTrue(comparator.compare(1, 6) == 0);
+ assertTrue(comparator.compare(0, 7) == 0);
+ assertTrue(comparator.compare(8, 9) < 0);
+ assertTrue(comparator.compare(4, 8) < 0);
+ assertTrue(comparator.compare(5, 9) < 0);
+ assertTrue(comparator.compare(2, 9) > 0);
+ }
+ }
+
+ @Test
+ public void testCompareLong() {
+ try (BigIntVector vec = new BigIntVector("", allocator)) {
+ vec.allocateNew(8);
+ ValueVectorDataPopulator.setVector(
+ vec, -1L, 0L, 1L, null, 1L, 5L, Long.MIN_VALUE + 1L, Long.MAX_VALUE);
+
+ VectorValueComparator<BigIntVector> comparator =
+ DefaultVectorComparators.createDefaultComparator(vec);
+ comparator.attachVector(vec);
+
+ assertTrue(comparator.compare(0, 1) < 0);
+ assertTrue(comparator.compare(0, 2) < 0);
+ assertTrue(comparator.compare(2, 1) > 0);
+
+ // test equality
+ assertTrue(comparator.compare(5, 5) == 0);
+ assertTrue(comparator.compare(2, 4) == 0);
+
+ // null first
+ assertTrue(comparator.compare(3, 4) < 0);
+ assertTrue(comparator.compare(5, 3) > 0);
+
+ // potential overflow
+ assertTrue(comparator.compare(6, 7) < 0);
+ assertTrue(comparator.compare(7, 6) > 0);
+ assertTrue(comparator.compare(7, 7) == 0);
+ }
+ }
+
+ @Test
+ public void testCompareInt() {
+ try (IntVector vec = new IntVector("", allocator)) {
+ vec.allocateNew(8);
+ ValueVectorDataPopulator.setVector(
+ vec, -1, 0, 1, null, 1, 5, Integer.MIN_VALUE + 1, Integer.MAX_VALUE);
+
+ VectorValueComparator<IntVector> comparator =
+ DefaultVectorComparators.createDefaultComparator(vec);
+ comparator.attachVector(vec);
+
+ assertTrue(comparator.compare(0, 1) < 0);
+ assertTrue(comparator.compare(0, 2) < 0);
+ assertTrue(comparator.compare(2, 1) > 0);
+
+ // test equality
+ assertTrue(comparator.compare(5, 5) == 0);
+ assertTrue(comparator.compare(2, 4) == 0);
+
+ // null first
+ assertTrue(comparator.compare(3, 4) < 0);
+ assertTrue(comparator.compare(5, 3) > 0);
+
+ // potential overflow
+ assertTrue(comparator.compare(6, 7) < 0);
+ assertTrue(comparator.compare(7, 6) > 0);
+ assertTrue(comparator.compare(7, 7) == 0);
+ }
+ }
+
+ @Test
+ public void testCompareShort() {
+ try (SmallIntVector vec = new SmallIntVector("", allocator)) {
+ vec.allocateNew(8);
+ ValueVectorDataPopulator.setVector(
+ vec, (short) -1, (short) 0, (short) 1, null, (short) 1, (short) 5,
+ (short) (Short.MIN_VALUE + 1), Short.MAX_VALUE);
+
+ VectorValueComparator<SmallIntVector> comparator =
+ DefaultVectorComparators.createDefaultComparator(vec);
+ comparator.attachVector(vec);
+
+ assertTrue(comparator.compare(0, 1) < 0);
+ assertTrue(comparator.compare(0, 2) < 0);
+ assertTrue(comparator.compare(2, 1) > 0);
+
+ // test equality
+ assertTrue(comparator.compare(5, 5) == 0);
+ assertTrue(comparator.compare(2, 4) == 0);
+
+ // null first
+ assertTrue(comparator.compare(3, 4) < 0);
+ assertTrue(comparator.compare(5, 3) > 0);
+
+ // potential overflow
+ assertTrue(comparator.compare(6, 7) < 0);
+ assertTrue(comparator.compare(7, 6) > 0);
+ assertTrue(comparator.compare(7, 7) == 0);
+ }
+ }
+
+ @Test
+ public void testCompareByte() {
+ try (TinyIntVector vec = new TinyIntVector("", allocator)) {
+ vec.allocateNew(8);
+ ValueVectorDataPopulator.setVector(
+ vec, (byte) -1, (byte) 0, (byte) 1, null, (byte) 1, (byte) 5,
+ (byte) (Byte.MIN_VALUE + 1), Byte.MAX_VALUE);
+
+ VectorValueComparator<TinyIntVector> comparator =
+ DefaultVectorComparators.createDefaultComparator(vec);
+ comparator.attachVector(vec);
+
+ assertTrue(comparator.compare(0, 1) < 0);
+ assertTrue(comparator.compare(0, 2) < 0);
+ assertTrue(comparator.compare(2, 1) > 0);
+
+ // test equality
+ assertTrue(comparator.compare(5, 5) == 0);
+ assertTrue(comparator.compare(2, 4) == 0);
+
+ // null first
+ assertTrue(comparator.compare(3, 4) < 0);
+ assertTrue(comparator.compare(5, 3) > 0);
+
+ // potential overflow
+ assertTrue(comparator.compare(6, 7) < 0);
+ assertTrue(comparator.compare(7, 6) > 0);
+ assertTrue(comparator.compare(7, 7) == 0);
+ }
+ }
+}
diff --git a/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java
new file mode 100644
index 000000000..91ef52017
--- /dev/null
+++ b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java
@@ -0,0 +1,240 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.sort;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
+
+import java.util.stream.IntStream;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.testing.ValueVectorDataPopulator;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Test cases for {@link FixedWidthInPlaceVectorSorter}.
+ */
+public class TestFixedWidthInPlaceVectorSorter {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void prepare() {
+ allocator = new RootAllocator(1024 * 1024);
+ }
+
+ @After
+ public void shutdown() {
+ allocator.close();
+ }
+
+ @Test
+ public void testSortInt() {
+ try (IntVector vec = new IntVector("", allocator)) {
+ vec.allocateNew(10);
+ vec.setValueCount(10);
+
+ // fill data to sort
+ vec.set(0, 10);
+ vec.set(1, 8);
+ vec.setNull(2);
+ vec.set(3, 10);
+ vec.set(4, 12);
+ vec.set(5, 17);
+ vec.setNull(6);
+ vec.set(7, 23);
+ vec.set(8, 35);
+ vec.set(9, 2);
+
+ // sort the vector
+ FixedWidthInPlaceVectorSorter sorter = new FixedWidthInPlaceVectorSorter();
+ VectorValueComparator<IntVector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
+
+ sorter.sortInPlace(vec, comparator);
+
+ // verify results
+ Assert.assertEquals(10, vec.getValueCount());
+
+ assertTrue(vec.isNull(0));
+ assertTrue(vec.isNull(1));
+ Assert.assertEquals(2, vec.get(2));
+ Assert.assertEquals(8, vec.get(3));
+ Assert.assertEquals(10, vec.get(4));
+ Assert.assertEquals(10, vec.get(5));
+ Assert.assertEquals(12, vec.get(6));
+ Assert.assertEquals(17, vec.get(7));
+ Assert.assertEquals(23, vec.get(8));
+ Assert.assertEquals(35, vec.get(9));
+ }
+ }
+
+ /**
+ * Tests the worst case for quick sort.
+ * It may cause stack overflow if the algorithm is implemented as a recursive algorithm.
+ */
+ @Test
+ public void testSortLargeIncreasingInt() {
+ final int vectorLength = 20000;
+ try (IntVector vec = new IntVector("", allocator)) {
+ vec.allocateNew(vectorLength);
+ vec.setValueCount(vectorLength);
+
+ // fill data to sort
+ for (int i = 0; i < vectorLength; i++) {
+ vec.set(i, i);
+ }
+
+ // sort the vector
+ FixedWidthInPlaceVectorSorter sorter = new FixedWidthInPlaceVectorSorter();
+ VectorValueComparator<IntVector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
+
+ sorter.sortInPlace(vec, comparator);
+
+ // verify results
+ Assert.assertEquals(vectorLength, vec.getValueCount());
+
+ for (int i = 0; i < vectorLength; i++) {
+ Assert.assertEquals(i, vec.get(i));
+ }
+ }
+ }
+
+ @Test
+ public void testChoosePivot() {
+ final int vectorLength = 100;
+ try (IntVector vec = new IntVector("", allocator)) {
+ vec.allocateNew(vectorLength);
+
+ // the vector is sorted, so the pivot should be in the middle
+ for (int i = 0; i < vectorLength; i++) {
+ vec.set(i, i * 100);
+ }
+ vec.setValueCount(vectorLength);
+
+ FixedWidthInPlaceVectorSorter sorter = new FixedWidthInPlaceVectorSorter();
+ VectorValueComparator<IntVector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
+
+ try (IntVector pivotBuffer = (IntVector) vec.getField().createVector(allocator)) {
+ // setup internal data structures
+ pivotBuffer.allocateNew(1);
+ sorter.pivotBuffer = pivotBuffer;
+ sorter.comparator = comparator;
+ sorter.vec = vec;
+ comparator.attachVectors(vec, pivotBuffer);
+
+ int low = 5;
+ int high = 6;
+ int pivotValue = vec.get(low);
+ assertTrue(high - low + 1 < FixedWidthInPlaceVectorSorter.STOP_CHOOSING_PIVOT_THRESHOLD);
+
+ // the range is small enough, so the pivot is simply selected as the low value
+ sorter.choosePivot(low, high);
+ assertEquals(pivotValue, vec.get(low));
+
+ low = 30;
+ high = 80;
+ pivotValue = vec.get((low + high) / 2);
+ assertTrue(high - low + 1 >= FixedWidthInPlaceVectorSorter.STOP_CHOOSING_PIVOT_THRESHOLD);
+
+ // the range is large enough, so the median is selected as the pivot
+ sorter.choosePivot(low, high);
+ assertEquals(pivotValue, vec.get(low));
+ }
+ }
+ }
+
+ /**
+ * Evaluates choosing pivot for all possible permutations of 3 numbers.
+ */
+ @Test
+ public void testChoosePivotAllPermutes() {
+ try (IntVector vec = new IntVector("", allocator)) {
+ vec.allocateNew(3);
+
+ FixedWidthInPlaceVectorSorter sorter = new FixedWidthInPlaceVectorSorter();
+ VectorValueComparator<IntVector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
+
+ try (IntVector pivotBuffer = (IntVector) vec.getField().createVector(allocator)) {
+ // setup internal data structures
+ pivotBuffer.allocateNew(1);
+ sorter.pivotBuffer = pivotBuffer;
+ sorter.comparator = comparator;
+ sorter.vec = vec;
+ comparator.attachVectors(vec, pivotBuffer);
+
+ int low = 0;
+ int high = 2;
+
+ ValueVectorDataPopulator.setVector(vec, 11, 22, 33);
+ sorter.choosePivot(low, high);
+ assertEquals(22, vec.get(0));
+
+ ValueVectorDataPopulator.setVector(vec, 11, 33, 22);
+ sorter.choosePivot(low, high);
+ assertEquals(22, vec.get(0));
+
+ ValueVectorDataPopulator.setVector(vec, 22, 11, 33);
+ sorter.choosePivot(low, high);
+ assertEquals(22, vec.get(0));
+
+ ValueVectorDataPopulator.setVector(vec, 22, 33, 11);
+ sorter.choosePivot(low, high);
+ assertEquals(22, vec.get(0));
+
+ ValueVectorDataPopulator.setVector(vec, 33, 11, 22);
+ sorter.choosePivot(low, high);
+ assertEquals(22, vec.get(0));
+
+ ValueVectorDataPopulator.setVector(vec, 33, 22, 11);
+ sorter.choosePivot(low, high);
+ assertEquals(22, vec.get(0));
+ }
+ }
+ }
+
+ @Test
+ public void testSortInt2() {
+ try (IntVector vector = new IntVector("vector", allocator)) {
+ ValueVectorDataPopulator.setVector(vector,
+ 0, 1, 2, 3, 4, 5, 30, 31, 32, 33,
+ 34, 35, 60, 61, 62, 63, 64, 65, 6, 7,
+ 8, 9, 10, 11, 36, 37, 38, 39, 40, 41,
+ 66, 67, 68, 69, 70, 71);
+
+ FixedWidthInPlaceVectorSorter sorter = new FixedWidthInPlaceVectorSorter();
+ VectorValueComparator<IntVector> comparator = DefaultVectorComparators.createDefaultComparator(vector);
+
+ sorter.sortInPlace(vector, comparator);
+
+ int[] actual = new int[vector.getValueCount()];
+ IntStream.range(0, vector.getValueCount()).forEach(
+ i -> actual[i] = vector.get(i));
+
+ assertArrayEquals(
+ new int[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+ 11, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
+ 40, 41, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71}, actual);
+ }
+ }
+}
diff --git a/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java
new file mode 100644
index 000000000..e3701f1d8
--- /dev/null
+++ b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java
@@ -0,0 +1,365 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.sort;
+
+import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
+
+import java.util.stream.IntStream;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.SmallIntVector;
+import org.apache.arrow.vector.TinyIntVector;
+import org.apache.arrow.vector.testing.ValueVectorDataPopulator;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Test cases for {@link FixedWidthOutOfPlaceVectorSorter}.
+ */
+public class TestFixedWidthOutOfPlaceVectorSorter {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void prepare() {
+ allocator = new RootAllocator(1024 * 1024);
+ }
+
+ @After
+ public void shutdown() {
+ allocator.close();
+ }
+
+ @Test
+ public void testSortByte() {
+ try (TinyIntVector vec = new TinyIntVector("", allocator)) {
+ vec.allocateNew(10);
+ vec.setValueCount(10);
+
+ // fill data to sort
+ vec.set(0, 10);
+ vec.set(1, 8);
+ vec.setNull(2);
+ vec.set(3, 10);
+ vec.set(4, 12);
+ vec.set(5, 17);
+ vec.setNull(6);
+ vec.set(7, 23);
+ vec.set(8, 35);
+ vec.set(9, 2);
+
+ // sort the vector
+ FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter();
+ VectorValueComparator<TinyIntVector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
+
+ TinyIntVector sortedVec =
+ (TinyIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);
+ sortedVec.allocateNew(vec.getValueCount());
+ sortedVec.setValueCount(vec.getValueCount());
+
+ sorter.sortOutOfPlace(vec, sortedVec, comparator);
+
+ // verify results
+ Assert.assertEquals(vec.getValueCount(), sortedVec.getValueCount());
+
+ assertTrue(sortedVec.isNull(0));
+ assertTrue(sortedVec.isNull(1));
+ Assert.assertEquals((byte) 2, sortedVec.get(2));
+ Assert.assertEquals((byte) 8, sortedVec.get(3));
+ Assert.assertEquals((byte) 10, sortedVec.get(4));
+ Assert.assertEquals((byte) 10, sortedVec.get(5));
+ Assert.assertEquals((byte) 12, sortedVec.get(6));
+ Assert.assertEquals((byte) 17, sortedVec.get(7));
+ Assert.assertEquals((byte) 23, sortedVec.get(8));
+ Assert.assertEquals((byte) 35, sortedVec.get(9));
+
+ sortedVec.close();
+ }
+ }
+
+ @Test
+ public void testSortShort() {
+ try (SmallIntVector vec = new SmallIntVector("", allocator)) {
+ vec.allocateNew(10);
+ vec.setValueCount(10);
+
+ // fill data to sort
+ vec.set(0, 10);
+ vec.set(1, 8);
+ vec.setNull(2);
+ vec.set(3, 10);
+ vec.set(4, 12);
+ vec.set(5, 17);
+ vec.setNull(6);
+ vec.set(7, 23);
+ vec.set(8, 35);
+ vec.set(9, 2);
+
+ // sort the vector
+ FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter();
+ VectorValueComparator<SmallIntVector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
+
+ SmallIntVector sortedVec =
+ (SmallIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);
+ sortedVec.allocateNew(vec.getValueCount());
+ sortedVec.setValueCount(vec.getValueCount());
+
+ sorter.sortOutOfPlace(vec, sortedVec, comparator);
+
+ // verify results
+ Assert.assertEquals(vec.getValueCount(), sortedVec.getValueCount());
+
+ assertTrue(sortedVec.isNull(0));
+ assertTrue(sortedVec.isNull(1));
+ Assert.assertEquals((short) 2, sortedVec.get(2));
+ Assert.assertEquals((short) 8, sortedVec.get(3));
+ Assert.assertEquals((short) 10, sortedVec.get(4));
+ Assert.assertEquals((short) 10, sortedVec.get(5));
+ Assert.assertEquals((short) 12, sortedVec.get(6));
+ Assert.assertEquals((short) 17, sortedVec.get(7));
+ Assert.assertEquals((short) 23, sortedVec.get(8));
+ Assert.assertEquals((short) 35, sortedVec.get(9));
+
+ sortedVec.close();
+ }
+ }
+
+ @Test
+ public void testSortInt() {
+ try (IntVector vec = new IntVector("", allocator)) {
+ vec.allocateNew(10);
+ vec.setValueCount(10);
+
+ // fill data to sort
+ vec.set(0, 10);
+ vec.set(1, 8);
+ vec.setNull(2);
+ vec.set(3, 10);
+ vec.set(4, 12);
+ vec.set(5, 17);
+ vec.setNull(6);
+ vec.set(7, 23);
+ vec.set(8, 35);
+ vec.set(9, 2);
+
+ // sort the vector
+ FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter();
+ VectorValueComparator<IntVector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
+
+ IntVector sortedVec = (IntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);
+ sortedVec.allocateNew(vec.getValueCount());
+ sortedVec.setValueCount(vec.getValueCount());
+
+ sorter.sortOutOfPlace(vec, sortedVec, comparator);
+
+ // verify results
+ Assert.assertEquals(vec.getValueCount(), sortedVec.getValueCount());
+
+ assertTrue(sortedVec.isNull(0));
+ assertTrue(sortedVec.isNull(1));
+ Assert.assertEquals(2, sortedVec.get(2));
+ Assert.assertEquals(8, sortedVec.get(3));
+ Assert.assertEquals(10, sortedVec.get(4));
+ Assert.assertEquals(10, sortedVec.get(5));
+ Assert.assertEquals(12, sortedVec.get(6));
+ Assert.assertEquals(17, sortedVec.get(7));
+ Assert.assertEquals(23, sortedVec.get(8));
+ Assert.assertEquals(35, sortedVec.get(9));
+
+ sortedVec.close();
+ }
+ }
+
+ @Test
+ public void testSortLong() {
+ try (BigIntVector vec = new BigIntVector("", allocator)) {
+ vec.allocateNew(10);
+ vec.setValueCount(10);
+
+ // fill data to sort
+ vec.set(0, 10L);
+ vec.set(1, 8L);
+ vec.setNull(2);
+ vec.set(3, 10L);
+ vec.set(4, 12L);
+ vec.set(5, 17L);
+ vec.setNull(6);
+ vec.set(7, 23L);
+ vec.set(8, 1L << 35L);
+ vec.set(9, 2L);
+
+ // sort the vector
+ FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter();
+ VectorValueComparator<BigIntVector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
+
+ BigIntVector sortedVec = (BigIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);
+ sortedVec.allocateNew(vec.getValueCount());
+ sortedVec.setValueCount(vec.getValueCount());
+
+ sorter.sortOutOfPlace(vec, sortedVec, comparator);
+
+ // verify results
+ Assert.assertEquals(vec.getValueCount(), sortedVec.getValueCount());
+
+ assertTrue(sortedVec.isNull(0));
+ assertTrue(sortedVec.isNull(1));
+ Assert.assertEquals(2L, sortedVec.get(2));
+ Assert.assertEquals(8L, sortedVec.get(3));
+ Assert.assertEquals(10L, sortedVec.get(4));
+ Assert.assertEquals(10L, sortedVec.get(5));
+ Assert.assertEquals(12L, sortedVec.get(6));
+ Assert.assertEquals(17L, sortedVec.get(7));
+ Assert.assertEquals(23L, sortedVec.get(8));
+ Assert.assertEquals(1L << 35L, sortedVec.get(9));
+
+ sortedVec.close();
+ }
+ }
+
+ @Test
+ public void testSortFloat() {
+ try (Float4Vector vec = new Float4Vector("", allocator)) {
+ vec.allocateNew(10);
+ vec.setValueCount(10);
+
+ // fill data to sort
+ vec.set(0, 10f);
+ vec.set(1, 8f);
+ vec.setNull(2);
+ vec.set(3, 10f);
+ vec.set(4, 12f);
+ vec.set(5, 17f);
+ vec.setNull(6);
+ vec.set(7, 23f);
+ vec.set(8, Float.NaN);
+ vec.set(9, 2f);
+
+ // sort the vector
+ FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter();
+ VectorValueComparator<Float4Vector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
+
+ Float4Vector sortedVec = (Float4Vector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);
+ sortedVec.allocateNew(vec.getValueCount());
+ sortedVec.setValueCount(vec.getValueCount());
+
+ sorter.sortOutOfPlace(vec, sortedVec, comparator);
+
+ // verify results
+ Assert.assertEquals(vec.getValueCount(), sortedVec.getValueCount());
+
+ assertTrue(sortedVec.isNull(0));
+ assertTrue(sortedVec.isNull(1));
+ Assert.assertEquals(2f, sortedVec.get(2), 0f);
+ Assert.assertEquals(8f, sortedVec.get(3), 0f);
+ Assert.assertEquals(10f, sortedVec.get(4), 0f);
+ Assert.assertEquals(10f, sortedVec.get(5), 0f);
+ Assert.assertEquals(12f, sortedVec.get(6), 0f);
+ Assert.assertEquals(17f, sortedVec.get(7), 0f);
+ Assert.assertEquals(23f, sortedVec.get(8), 0f);
+ Assert.assertEquals(Float.NaN, sortedVec.get(9), 0f);
+
+ sortedVec.close();
+ }
+ }
+
+ @Test
+ public void testSortDouble() {
+ try (Float8Vector vec = new Float8Vector("", allocator)) {
+ vec.allocateNew(10);
+ vec.setValueCount(10);
+
+ // fill data to sort
+ vec.set(0, 10);
+ vec.set(1, 8);
+ vec.setNull(2);
+ vec.set(3, 10);
+ vec.set(4, 12);
+ vec.set(5, 17);
+ vec.setNull(6);
+ vec.set(7, Double.NaN);
+ vec.set(8, 35);
+ vec.set(9, 2);
+
+ // sort the vector
+ FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter();
+ VectorValueComparator<Float8Vector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
+
+ Float8Vector sortedVec = (Float8Vector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);
+ sortedVec.allocateNew(vec.getValueCount());
+ sortedVec.setValueCount(vec.getValueCount());
+
+ sorter.sortOutOfPlace(vec, sortedVec, comparator);
+
+ // verify results
+ Assert.assertEquals(vec.getValueCount(), sortedVec.getValueCount());
+
+ assertTrue(sortedVec.isNull(0));
+ assertTrue(sortedVec.isNull(1));
+ Assert.assertEquals(2, sortedVec.get(2), 0);
+ Assert.assertEquals(8, sortedVec.get(3), 0);
+ Assert.assertEquals(10, sortedVec.get(4), 0);
+ Assert.assertEquals(10, sortedVec.get(5), 0);
+ Assert.assertEquals(12, sortedVec.get(6), 0);
+ Assert.assertEquals(17, sortedVec.get(7), 0);
+ Assert.assertEquals(35, sortedVec.get(8), 0);
+ Assert.assertEquals(Double.NaN, sortedVec.get(9), 0);
+
+ sortedVec.close();
+ }
+ }
+
+ @Test
+ public void testSortInt2() {
+ try (IntVector vec = new IntVector("", allocator)) {
+ ValueVectorDataPopulator.setVector(vec,
+ 0, 1, 2, 3, 4, 5, 30, 31, 32, 33,
+ 34, 35, 60, 61, 62, 63, 64, 65, 6, 7,
+ 8, 9, 10, 11, 36, 37, 38, 39, 40, 41,
+ 66, 67, 68, 69, 70, 71);
+
+ // sort the vector
+ FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter();
+ VectorValueComparator<IntVector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
+
+ try (IntVector sortedVec = (IntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null)) {
+ sortedVec.allocateNew(vec.getValueCount());
+ sortedVec.setValueCount(vec.getValueCount());
+
+ sorter.sortOutOfPlace(vec, sortedVec, comparator);
+
+ // verify results
+ int[] actual = new int[sortedVec.getValueCount()];
+ IntStream.range(0, sortedVec.getValueCount()).forEach(
+ i -> actual[i] = sortedVec.get(i));
+
+ assertArrayEquals(
+ new int[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+ 11, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
+ 40, 41, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71}, actual);
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthSorting.java b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthSorting.java
new file mode 100644
index 000000000..ba2a341bf
--- /dev/null
+++ b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthSorting.java
@@ -0,0 +1,172 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.sort;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.function.Function;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BaseFixedWidthVector;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.SmallIntVector;
+import org.apache.arrow.vector.TinyIntVector;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+/**
+ * Test sorting fixed width vectors with random data.
+ */
+@RunWith(Parameterized.class)
+public class TestFixedWidthSorting<V extends BaseFixedWidthVector, U extends Comparable<U>> {
+
+ static final int[] VECTOR_LENGTHS = new int[] {2, 5, 10, 50, 100, 1000, 3000};
+
+ static final double[] NULL_FRACTIONS = {0, 0.1, 0.3, 0.5, 0.7, 0.9, 1};
+
+ private final int length;
+
+ private final double nullFraction;
+
+ private final boolean inPlace;
+
+ private final Function<BufferAllocator, V> vectorGenerator;
+
+ private final TestSortingUtil.DataGenerator<V, U> dataGenerator;
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void prepare() {
+ allocator = new RootAllocator(Integer.MAX_VALUE);
+ }
+
+ @After
+ public void shutdown() {
+ allocator.close();
+ }
+
+ public TestFixedWidthSorting(
+ int length, double nullFraction, boolean inPlace, String desc,
+ Function<BufferAllocator, V> vectorGenerator, TestSortingUtil.DataGenerator<V, U> dataGenerator) {
+ this.length = length;
+ this.nullFraction = nullFraction;
+ this.inPlace = inPlace;
+ this.vectorGenerator = vectorGenerator;
+ this.dataGenerator = dataGenerator;
+ }
+
+ @Test
+ public void testSort() {
+ if (inPlace) {
+ sortInPlace();
+ } else {
+ sortOutOfPlace();
+ }
+ }
+
+ void sortInPlace() {
+ try (V vector = vectorGenerator.apply(allocator)) {
+ U[] array = dataGenerator.populate(vector, length, nullFraction);
+ TestSortingUtil.sortArray(array);
+
+ FixedWidthInPlaceVectorSorter sorter = new FixedWidthInPlaceVectorSorter();
+ VectorValueComparator<V> comparator = DefaultVectorComparators.createDefaultComparator(vector);
+
+ sorter.sortInPlace(vector, comparator);
+
+ TestSortingUtil.verifyResults(vector, array);
+ }
+ }
+
+ void sortOutOfPlace() {
+ try (V vector = vectorGenerator.apply(allocator)) {
+ U[] array = dataGenerator.populate(vector, length, nullFraction);
+ TestSortingUtil.sortArray(array);
+
+ // sort the vector
+ FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter();
+ VectorValueComparator<V> comparator = DefaultVectorComparators.createDefaultComparator(vector);
+
+ try (V sortedVec = (V) vector.getField().getFieldType().createNewSingleVector("", allocator, null)) {
+ sortedVec.allocateNew(vector.getValueCount());
+ sortedVec.setValueCount(vector.getValueCount());
+
+ sorter.sortOutOfPlace(vector, sortedVec, comparator);
+
+ // verify results
+ TestSortingUtil.verifyResults(sortedVec, array);
+ }
+ }
+ }
+
+ @Parameterized.Parameters(name = "length = {0}, null fraction = {1}, in place = {2}, vector = {3}")
+ public static Collection<Object[]> getParameters() {
+ List<Object[]> params = new ArrayList<>();
+ for (int length : VECTOR_LENGTHS) {
+ for (double nullFrac : NULL_FRACTIONS) {
+ for (boolean inPlace : new boolean[] {true, false}) {
+ params.add(new Object[] {
+ length, nullFrac, inPlace, "TinyIntVector",
+ (Function<BufferAllocator, TinyIntVector>) (allocator -> new TinyIntVector("vector", allocator)),
+ TestSortingUtil.TINY_INT_GENERATOR
+ });
+
+ params.add(new Object[] {
+ length, nullFrac, inPlace, "SmallIntVector",
+ (Function<BufferAllocator, SmallIntVector>) (allocator -> new SmallIntVector("vector", allocator)),
+ TestSortingUtil.SMALL_INT_GENERATOR
+ });
+
+ params.add(new Object[] {
+ length, nullFrac, inPlace, "IntVector",
+ (Function<BufferAllocator, IntVector>) (allocator -> new IntVector("vector", allocator)),
+ TestSortingUtil.INT_GENERATOR
+ });
+
+ params.add(new Object[] {
+ length, nullFrac, inPlace, "BigIntVector",
+ (Function<BufferAllocator, BigIntVector>) (allocator -> new BigIntVector("vector", allocator)),
+ TestSortingUtil.LONG_GENERATOR
+ });
+
+ params.add(new Object[] {
+ length, nullFrac, inPlace, "Float4Vector",
+ (Function<BufferAllocator, Float4Vector>) (allocator -> new Float4Vector("vector", allocator)),
+ TestSortingUtil.FLOAT_GENERATOR
+ });
+
+ params.add(new Object[] {
+ length, nullFrac, inPlace, "Float8Vector",
+ (Function<BufferAllocator, Float8Vector>) (allocator -> new Float8Vector("vector", allocator)),
+ TestSortingUtil.DOUBLE_GENERATOR
+ });
+ }
+ }
+ }
+ return params;
+ }
+}
diff --git a/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestIndexSorter.java b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestIndexSorter.java
new file mode 100644
index 000000000..99e22f8bd
--- /dev/null
+++ b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestIndexSorter.java
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.sort;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.testing.ValueVectorDataPopulator;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Test cases for {@link IndexSorter}.
+ */
+public class TestIndexSorter {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void prepare() {
+ allocator = new RootAllocator(1024 * 1024);
+ }
+
+ @After
+ public void shutdown() {
+ allocator.close();
+ }
+
+ @Test
+ public void testIndexSort() {
+ try (IntVector vec = new IntVector("", allocator)) {
+ vec.allocateNew(10);
+ vec.setValueCount(10);
+
+ // fill data to sort
+ ValueVectorDataPopulator.setVector(vec, 11, 8, 33, 10, 12, 17, null, 23, 35, 2);
+
+ // sort the index
+ IndexSorter<IntVector> indexSorter = new IndexSorter<>();
+ DefaultVectorComparators.IntComparator intComparator = new DefaultVectorComparators.IntComparator();
+ intComparator.attachVector(vec);
+
+ IntVector indices = new IntVector("", allocator);
+ indices.setValueCount(10);
+ indexSorter.sort(vec, indices, intComparator);
+
+ int[] expected = new int[]{6, 9, 1, 3, 0, 4, 5, 7, 2, 8};
+
+ for (int i = 0; i < expected.length; i++) {
+ assertTrue(!indices.isNull(i));
+ assertEquals(expected[i], indices.get(i));
+ }
+ indices.close();
+ }
+ }
+
+ /**
+ * Tests the worst case for quick sort.
+ * It may cause stack overflow if the algorithm is implemented as a recursive algorithm.
+ */
+ @Test
+ public void testSortLargeIncreasingInt() {
+ final int vectorLength = 20000;
+ try (IntVector vec = new IntVector("", allocator)) {
+ vec.allocateNew(vectorLength);
+ vec.setValueCount(vectorLength);
+
+ // fill data to sort
+ for (int i = 0; i < vectorLength; i++) {
+ vec.set(i, i);
+ }
+
+ // sort the vector
+ IndexSorter<IntVector> indexSorter = new IndexSorter<>();
+ DefaultVectorComparators.IntComparator intComparator = new DefaultVectorComparators.IntComparator();
+ intComparator.attachVector(vec);
+
+ try (IntVector indices = new IntVector("", allocator)) {
+ indices.setValueCount(vectorLength);
+ indexSorter.sort(vec, indices, intComparator);
+
+ for (int i = 0; i < vectorLength; i++) {
+ assertTrue(!indices.isNull(i));
+ assertEquals(i, indices.get(i));
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testChoosePivot() {
+ final int vectorLength = 100;
+ try (IntVector vec = new IntVector("vector", allocator);
+ IntVector indices = new IntVector("indices", allocator)) {
+ vec.allocateNew(vectorLength);
+ indices.allocateNew(vectorLength);
+
+ // the vector is sorted, so the pivot should be in the middle
+ for (int i = 0; i < vectorLength; i++) {
+ vec.set(i, i * 100);
+ indices.set(i, i);
+ }
+ vec.setValueCount(vectorLength);
+ indices.setValueCount(vectorLength);
+
+ VectorValueComparator<IntVector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
+
+ // setup internal data structures
+ comparator.attachVector(vec);
+
+ int low = 5;
+ int high = 6;
+ assertTrue(high - low + 1 < FixedWidthInPlaceVectorSorter.STOP_CHOOSING_PIVOT_THRESHOLD);
+
+ // the range is small enough, so the pivot is simply selected as the low value
+ int pivotIndex = IndexSorter.choosePivot(low, high, indices, comparator);
+ assertEquals(pivotIndex, low);
+ assertEquals(pivotIndex, indices.get(low));
+
+ low = 30;
+ high = 80;
+ assertTrue(high - low + 1 >= FixedWidthInPlaceVectorSorter.STOP_CHOOSING_PIVOT_THRESHOLD);
+
+ // the range is large enough, so the median is selected as the pivot
+ pivotIndex = IndexSorter.choosePivot(low, high, indices, comparator);
+ assertEquals(pivotIndex, (low + high) / 2);
+ assertEquals(pivotIndex, indices.get(low));
+ }
+ }
+
+ /**
+ * Evaluates choosing pivot for all possible permutations of 3 numbers.
+ */
+ @Test
+ public void testChoosePivotAllPermutes() {
+ try (IntVector vec = new IntVector("vector", allocator);
+ IntVector indices = new IntVector("indices", allocator)) {
+ vec.allocateNew();
+ indices.allocateNew();
+
+ VectorValueComparator<IntVector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
+
+ // setup internal data structures
+ comparator.attachVector(vec);
+ int low = 0;
+ int high = 2;
+
+ // test all the 6 permutations of 3 numbers
+ ValueVectorDataPopulator.setVector(indices, 0, 1, 2);
+ ValueVectorDataPopulator.setVector(vec, 11, 22, 33);
+ int pivotIndex = IndexSorter.choosePivot(low, high, indices, comparator);
+ assertEquals(1, pivotIndex);
+ assertEquals(1, indices.get(low));
+
+ ValueVectorDataPopulator.setVector(indices, 0, 1, 2);
+ ValueVectorDataPopulator.setVector(vec, 11, 33, 22);
+ pivotIndex = IndexSorter.choosePivot(low, high, indices, comparator);
+ assertEquals(2, pivotIndex);
+ assertEquals(2, indices.get(low));
+
+ ValueVectorDataPopulator.setVector(indices, 0, 1, 2);
+ ValueVectorDataPopulator.setVector(vec, 22, 11, 33);
+ pivotIndex = IndexSorter.choosePivot(low, high, indices, comparator);
+ assertEquals(0, pivotIndex);
+ assertEquals(0, indices.get(low));
+
+ ValueVectorDataPopulator.setVector(indices, 0, 1, 2);
+ ValueVectorDataPopulator.setVector(vec, 22, 33, 11);
+ pivotIndex = IndexSorter.choosePivot(low, high, indices, comparator);
+ assertEquals(0, pivotIndex);
+ assertEquals(0, indices.get(low));
+
+ ValueVectorDataPopulator.setVector(indices, 0, 1, 2);
+ ValueVectorDataPopulator.setVector(vec, 33, 11, 22);
+ pivotIndex = IndexSorter.choosePivot(low, high, indices, comparator);
+ assertEquals(2, pivotIndex);
+ assertEquals(2, indices.get(low));
+
+ ValueVectorDataPopulator.setVector(indices, 0, 1, 2);
+ ValueVectorDataPopulator.setVector(vec, 33, 22, 11);
+ pivotIndex = IndexSorter.choosePivot(low, high, indices, comparator);
+ assertEquals(1, pivotIndex);
+ assertEquals(1, indices.get(low));
+ }
+ }
+}
diff --git a/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestInsertionSorter.java b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestInsertionSorter.java
new file mode 100644
index 000000000..ba9c42913
--- /dev/null
+++ b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestInsertionSorter.java
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.sort;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.testing.ValueVectorDataPopulator;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Test cases for {@link InsertionSorter}.
+ */
+public class TestInsertionSorter {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void prepare() {
+ allocator = new RootAllocator(1024 * 1024);
+ }
+
+ @After
+ public void shutdown() {
+ allocator.close();
+ }
+
+ private static final int VECTOR_LENGTH = 10;
+
+ private void testSortIntVectorRange(int start, int end, int[] expected) {
+ try (IntVector vector = new IntVector("vector", allocator);
+ IntVector buffer = new IntVector("buffer", allocator)) {
+
+ buffer.allocateNew(1);
+
+ ValueVectorDataPopulator.setVector(vector, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ assertEquals(VECTOR_LENGTH, vector.getValueCount());
+
+ VectorValueComparator<IntVector> comparator =
+ DefaultVectorComparators.createDefaultComparator(vector);
+ InsertionSorter.insertionSort(vector, start, end, comparator, buffer);
+
+ assertEquals(VECTOR_LENGTH, expected.length);
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ assertFalse(vector.isNull(i));
+ assertEquals(expected[i], vector.get(i));
+ }
+ }
+ }
+
+ @Test
+ public void testSortIntVector() {
+ testSortIntVectorRange(2, 5, new int[] {9, 8, 4, 5, 6, 7, 3, 2, 1, 0});
+ testSortIntVectorRange(3, 7, new int[] {9, 8, 7, 2, 3, 4, 5, 6, 1, 0});
+ testSortIntVectorRange(3, 4, new int[] {9, 8, 7, 5, 6, 4, 3, 2, 1, 0});
+ testSortIntVectorRange(7, 7, new int[] {9, 8, 7, 6, 5, 4, 3, 2, 1, 0});
+ testSortIntVectorRange(0, 5, new int[] {4, 5, 6, 7, 8, 9, 3, 2, 1, 0});
+ testSortIntVectorRange(8, 9, new int[] {9, 8, 7, 6, 5, 4, 3, 2, 0, 1});
+ testSortIntVectorRange(0, 9, new int[] {0, 1, 2, 3, 4, 5, 6, 7, 8, 9});
+ }
+
+ private void testSortIndicesRange(int start, int end, int[] expectedIndices) {
+ try (IntVector vector = new IntVector("vector", allocator);
+ IntVector indices = new IntVector("indices", allocator)) {
+
+ ValueVectorDataPopulator.setVector(vector, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ ValueVectorDataPopulator.setVector(indices, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9);
+
+ assertEquals(VECTOR_LENGTH, vector.getValueCount());
+ assertEquals(VECTOR_LENGTH, indices.getValueCount());
+
+ VectorValueComparator<IntVector> comparator =
+ DefaultVectorComparators.createDefaultComparator(vector);
+ comparator.attachVector(vector);
+
+ InsertionSorter.insertionSort(indices, start, end, comparator);
+
+ // verify results
+ assertEquals(VECTOR_LENGTH, expectedIndices.length);
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ assertFalse(indices.isNull(i));
+ assertEquals(expectedIndices[i], indices.get(i));
+ }
+ }
+ }
+
+ @Test
+ public void testSortIndices() {
+ testSortIndicesRange(2, 5, new int[] {0, 1, 5, 4, 3, 2, 6, 7, 8, 9});
+ testSortIndicesRange(3, 7, new int[] {0, 1, 2, 7, 6, 5, 4, 3, 8, 9});
+ testSortIndicesRange(3, 4, new int[] {0, 1, 2, 4, 3, 5, 6, 7, 8, 9});
+ testSortIndicesRange(7, 7, new int[] {0, 1, 2, 3, 4, 5, 6, 7, 8, 9});
+ testSortIndicesRange(0, 5, new int[] {5, 4, 3, 2, 1, 0, 6, 7, 8, 9});
+ testSortIndicesRange(8, 9, new int[] {0, 1, 2, 3, 4, 5, 6, 7, 9, 8});
+ testSortIndicesRange(0, 9, new int[] {9, 8, 7, 6, 5, 4, 3, 2, 1, 0});
+ }
+}
diff --git a/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOffHeapIntStack.java b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOffHeapIntStack.java
new file mode 100644
index 000000000..321ca226d
--- /dev/null
+++ b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOffHeapIntStack.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.sort;
+
+import static junit.framework.TestCase.assertEquals;
+import static junit.framework.TestCase.assertTrue;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Test cases for {@link OffHeapIntStack}.
+ */
+public class TestOffHeapIntStack {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void prepare() {
+ allocator = new RootAllocator(1024 * 1024);
+ }
+
+ @After
+ public void shutdown() {
+ allocator.close();
+ }
+
+ @Test
+ public void testPushPop() {
+ try (OffHeapIntStack stack = new OffHeapIntStack(allocator)) {
+ assertTrue(stack.isEmpty());
+
+ final int elemCount = 500;
+ for (int i = 0; i < elemCount; i++) {
+ stack.push(i);
+ assertEquals(i, stack.getTop());
+ }
+
+ assertEquals(elemCount, stack.getCount());
+
+ for (int i = 0; i < elemCount; i++) {
+ assertEquals(elemCount - i - 1, stack.getTop());
+ assertEquals(elemCount - i - 1, stack.pop());
+ }
+
+ assertTrue(stack.isEmpty());
+ }
+ }
+}
diff --git a/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestSortingUtil.java b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestSortingUtil.java
new file mode 100644
index 000000000..ea8655106
--- /dev/null
+++ b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestSortingUtil.java
@@ -0,0 +1,166 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.sort;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import java.lang.reflect.Array;
+import java.util.Arrays;
+import java.util.Random;
+import java.util.function.BiConsumer;
+import java.util.function.Supplier;
+
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.SmallIntVector;
+import org.apache.arrow.vector.TinyIntVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.testing.RandomDataGenerator;
+import org.apache.arrow.vector.testing.ValueVectorDataPopulator;
+
+/**
+ * Utilities for sorting related utilities.
+ */
+public class TestSortingUtil {
+
+ static final Random random = new Random(0);
+
+ static final DataGenerator<TinyIntVector, Byte> TINY_INT_GENERATOR = new DataGenerator<>(
+ RandomDataGenerator.TINY_INT_GENERATOR,
+ (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), Byte.class);
+
+ static final DataGenerator<SmallIntVector, Short> SMALL_INT_GENERATOR = new DataGenerator<>(
+ RandomDataGenerator.SMALL_INT_GENERATOR,
+ (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), Short.class);
+
+ static final DataGenerator<IntVector, Integer> INT_GENERATOR = new DataGenerator<>(
+ RandomDataGenerator.INT_GENERATOR,
+ (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), Integer.class);
+
+ static final DataGenerator<BigIntVector, Long> LONG_GENERATOR = new DataGenerator<>(
+ RandomDataGenerator.LONG_GENERATOR,
+ (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), Long.class);
+
+ static final DataGenerator<Float4Vector, Float> FLOAT_GENERATOR = new DataGenerator<>(
+ RandomDataGenerator.FLOAT_GENERATOR,
+ (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), Float.class);
+
+ static final DataGenerator<Float8Vector, Double> DOUBLE_GENERATOR = new DataGenerator<>(
+ RandomDataGenerator.DOUBLE_GENERATOR,
+ (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), Double.class);
+
+ static final DataGenerator<VarCharVector, String> STRING_GENERATOR = new DataGenerator<>(
+ () -> {
+ int strLength = random.nextInt(20) + 1;
+ return generateRandomString(strLength);
+ },
+ (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), String.class);
+
+ private TestSortingUtil() {
+ }
+
+ /**
+ * Verify that a vector is equal to an array.
+ */
+ public static <V extends ValueVector, U> void verifyResults(V vector, U[] expected) {
+ assertEquals(vector.getValueCount(), expected.length);
+ for (int i = 0; i < expected.length; i++) {
+ assertEquals(vector.getObject(i), expected[i]);
+ }
+ }
+
+ /**
+ * Sort an array with null values come first.
+ */
+ public static <U extends Comparable<U>> void sortArray(U[] array) {
+ Arrays.sort(array, (a, b) -> {
+ if (a == null || b == null) {
+ if (a == null && b == null) {
+ return 0;
+ }
+
+ // exactly one is null
+ if (a == null) {
+ return -1;
+ } else {
+ return 1;
+ }
+ }
+ return a.compareTo(b);
+ });
+ }
+
+ /**
+ * Generate a string with alphabetic characters only.
+ */
+ static String generateRandomString(int length) {
+ byte[] str = new byte[length];
+ final int lower = 'a';
+ final int upper = 'z';
+
+ for (int i = 0; i < length; i++) {
+ // make r non-negative
+ int r = random.nextInt() & Integer.MAX_VALUE;
+ str[i] = (byte) (r % (upper - lower + 1) + lower);
+ }
+
+ return new String(str);
+ }
+
+ /**
+ * Utility to generate data for testing.
+ * @param <V> vector type.
+ * @param <U> data element type.
+ */
+ static class DataGenerator<V extends ValueVector, U extends Comparable<U>> {
+
+ final Supplier<U> dataGenerator;
+
+ final BiConsumer<V, U[]> vectorPopulator;
+
+ final Class<U> clazz;
+
+ DataGenerator(
+ Supplier<U> dataGenerator, BiConsumer<V, U[]> vectorPopulator, Class<U> clazz) {
+ this.dataGenerator = dataGenerator;
+ this.vectorPopulator = vectorPopulator;
+ this.clazz = clazz;
+ }
+
+ /**
+ * Populate the vector according to the specified parameters.
+ * @param vector the vector to populate.
+ * @param length vector length.
+ * @param nullFraction the fraction of null values.
+ * @return An array with the same data as the vector.
+ */
+ U[] populate(V vector, int length, double nullFraction) {
+ U[] array = (U[]) Array.newInstance(clazz, length);
+ for (int i = 0; i < length; i++) {
+ double r = Math.random();
+ U value = r < nullFraction ? null : dataGenerator.get();
+ array[i] = value;
+ }
+ vectorPopulator.accept(vector, array);
+ return array;
+ }
+ }
+}
diff --git a/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestStableVectorComparator.java b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestStableVectorComparator.java
new file mode 100644
index 000000000..074193594
--- /dev/null
+++ b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestStableVectorComparator.java
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.sort;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.VarCharVector;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Test cases for {@link StableVectorComparator}.
+ */
+public class TestStableVectorComparator {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void prepare() {
+ allocator = new RootAllocator(1024 * 1024);
+ }
+
+ @After
+ public void shutdown() {
+ allocator.close();
+ }
+
+ @Test
+ public void testCompare() {
+ try (VarCharVector vec = new VarCharVector("", allocator)) {
+ vec.allocateNew(100, 5);
+ vec.setValueCount(10);
+
+ // fill data to sort
+ vec.set(0, "ba".getBytes());
+ vec.set(1, "abc".getBytes());
+ vec.set(2, "aa".getBytes());
+ vec.set(3, "abc".getBytes());
+ vec.set(4, "a".getBytes());
+
+ VectorValueComparator<VarCharVector> comparator = new TestVarCharSorter();
+ VectorValueComparator<VarCharVector> stableComparator = new StableVectorComparator<>(comparator);
+ stableComparator.attachVector(vec);
+
+ assertTrue(stableComparator.compare(0, 1) > 0);
+ assertTrue(stableComparator.compare(1, 2) < 0);
+ assertTrue(stableComparator.compare(2, 3) < 0);
+ assertTrue(stableComparator.compare(1, 3) < 0);
+ assertTrue(stableComparator.compare(3, 1) > 0);
+ assertTrue(stableComparator.compare(3, 3) == 0);
+ }
+ }
+
+ @Test
+ public void testStableSortString() {
+ try (VarCharVector vec = new VarCharVector("", allocator)) {
+ vec.allocateNew(100, 10);
+ vec.setValueCount(10);
+
+ // fill data to sort
+ vec.set(0, "a".getBytes());
+ vec.set(1, "abc".getBytes());
+ vec.set(2, "aa".getBytes());
+ vec.set(3, "a1".getBytes());
+ vec.set(4, "abcdefg".getBytes());
+ vec.set(5, "accc".getBytes());
+ vec.set(6, "afds".getBytes());
+ vec.set(7, "0".getBytes());
+ vec.set(8, "01".getBytes());
+ vec.set(9, "0c".getBytes());
+
+ // sort the vector
+ VariableWidthOutOfPlaceVectorSorter sorter = new VariableWidthOutOfPlaceVectorSorter();
+ VectorValueComparator<VarCharVector> comparator = new TestVarCharSorter();
+ VectorValueComparator<VarCharVector> stableComparator = new StableVectorComparator<>(comparator);
+
+ try (VarCharVector sortedVec =
+ (VarCharVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null)) {
+ sortedVec.allocateNew(vec.getByteCapacity(), vec.getValueCount());
+ sortedVec.setLastSet(vec.getValueCount() - 1);
+ sortedVec.setValueCount(vec.getValueCount());
+
+ sorter.sortOutOfPlace(vec, sortedVec, stableComparator);
+
+ // verify results
+ // the results are stable
+ assertEquals("0", new String(sortedVec.get(0)));
+ assertEquals("01", new String(sortedVec.get(1)));
+ assertEquals("0c", new String(sortedVec.get(2)));
+ assertEquals("a", new String(sortedVec.get(3)));
+ assertEquals("abc", new String(sortedVec.get(4)));
+ assertEquals("aa", new String(sortedVec.get(5)));
+ assertEquals("a1", new String(sortedVec.get(6)));
+ assertEquals("abcdefg", new String(sortedVec.get(7)));
+ assertEquals("accc", new String(sortedVec.get(8)));
+ assertEquals("afds", new String(sortedVec.get(9)));
+ }
+ }
+ }
+
+ /**
+ * Utility comparator that compares varchars by the first character.
+ */
+ private static class TestVarCharSorter extends VectorValueComparator<VarCharVector> {
+
+ @Override
+ public int compareNotNull(int index1, int index2) {
+ byte b1 = vector1.get(index1)[0];
+ byte b2 = vector2.get(index2)[0];
+ return b1 - b2;
+ }
+
+ @Override
+ public VectorValueComparator<VarCharVector> createNew() {
+ return new TestVarCharSorter();
+ }
+ }
+}
diff --git a/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java
new file mode 100644
index 000000000..46b306021
--- /dev/null
+++ b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.sort;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BaseVariableWidthVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Test cases for {@link VariableWidthOutOfPlaceVectorSorter}.
+ */
+public class TestVariableWidthOutOfPlaceVectorSorter {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void prepare() {
+ allocator = new RootAllocator(1024 * 1024);
+ }
+
+ @After
+ public void shutdown() {
+ allocator.close();
+ }
+
+ @Test
+ public void testSortString() {
+ try (VarCharVector vec = new VarCharVector("", allocator)) {
+ vec.allocateNew(100, 10);
+ vec.setValueCount(10);
+
+ // fill data to sort
+ vec.set(0, "hello".getBytes());
+ vec.set(1, "abc".getBytes());
+ vec.setNull(2);
+ vec.set(3, "world".getBytes());
+ vec.set(4, "12".getBytes());
+ vec.set(5, "dictionary".getBytes());
+ vec.setNull(6);
+ vec.set(7, "hello".getBytes());
+ vec.set(8, "good".getBytes());
+ vec.set(9, "yes".getBytes());
+
+ // sort the vector
+ VariableWidthOutOfPlaceVectorSorter sorter = new VariableWidthOutOfPlaceVectorSorter();
+ VectorValueComparator<BaseVariableWidthVector> comparator =
+ DefaultVectorComparators.createDefaultComparator(vec);
+
+ VarCharVector sortedVec =
+ (VarCharVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);
+ sortedVec.allocateNew(vec.getByteCapacity(), vec.getValueCount());
+ sortedVec.setLastSet(vec.getValueCount() - 1);
+ sortedVec.setValueCount(vec.getValueCount());
+
+ sorter.sortOutOfPlace(vec, sortedVec, comparator);
+
+ // verify results
+ Assert.assertEquals(vec.getValueCount(), sortedVec.getValueCount());
+ Assert.assertEquals(vec.getByteCapacity(), sortedVec.getByteCapacity());
+ Assert.assertEquals(vec.getLastSet(), sortedVec.getLastSet());
+
+ assertTrue(sortedVec.isNull(0));
+ assertTrue(sortedVec.isNull(1));
+ assertEquals("12", new String(sortedVec.get(2)));
+ assertEquals("abc", new String(sortedVec.get(3)));
+ assertEquals("dictionary", new String(sortedVec.get(4)));
+ assertEquals("good", new String(sortedVec.get(5)));
+ assertEquals("hello", new String(sortedVec.get(6)));
+ assertEquals("hello", new String(sortedVec.get(7)));
+ assertEquals("world", new String(sortedVec.get(8)));
+ assertEquals("yes", new String(sortedVec.get(9)));
+
+ sortedVec.close();
+ }
+ }
+}
diff --git a/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthSorting.java b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthSorting.java
new file mode 100644
index 000000000..068fe8b69
--- /dev/null
+++ b/src/arrow/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthSorting.java
@@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.sort;
+
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Comparator;
+import java.util.List;
+import java.util.function.Function;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BaseVariableWidthVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.util.Text;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+/**
+ * Test sorting variable width vectors with random data.
+ */
+@RunWith(Parameterized.class)
+public class TestVariableWidthSorting<V extends BaseVariableWidthVector, U extends Comparable<U>> {
+
+ static final int[] VECTOR_LENGTHS = new int[] {2, 5, 10, 50, 100, 1000, 3000};
+
+ static final double[] NULL_FRACTIONS = {0, 0.1, 0.3, 0.5, 0.7, 0.9, 1};
+
+ private final int length;
+
+ private final double nullFraction;
+
+ private final Function<BufferAllocator, V> vectorGenerator;
+
+ private final TestSortingUtil.DataGenerator<V, U> dataGenerator;
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void prepare() {
+ allocator = new RootAllocator(Integer.MAX_VALUE);
+ }
+
+ @After
+ public void shutdown() {
+ allocator.close();
+ }
+
+ public TestVariableWidthSorting(
+ int length, double nullFraction, String desc,
+ Function<BufferAllocator, V> vectorGenerator, TestSortingUtil.DataGenerator<V, U> dataGenerator) {
+ this.length = length;
+ this.nullFraction = nullFraction;
+ this.vectorGenerator = vectorGenerator;
+ this.dataGenerator = dataGenerator;
+ }
+
+ @Test
+ public void testSort() {
+ sortOutOfPlace();
+ }
+
+ void sortOutOfPlace() {
+ try (V vector = vectorGenerator.apply(allocator)) {
+ U[] array = dataGenerator.populate(vector, length, nullFraction);
+ Arrays.sort(array, (Comparator<? super U>) new StringComparator());
+
+ // sort the vector
+ VariableWidthOutOfPlaceVectorSorter sorter = new VariableWidthOutOfPlaceVectorSorter();
+ VectorValueComparator<V> comparator = DefaultVectorComparators.createDefaultComparator(vector);
+
+ try (V sortedVec = (V) vector.getField().getFieldType().createNewSingleVector("", allocator, null)) {
+ int dataSize = vector.getOffsetBuffer().getInt(vector.getValueCount() * 4);
+ sortedVec.allocateNew(dataSize, vector.getValueCount());
+ sortedVec.setValueCount(vector.getValueCount());
+
+ sorter.sortOutOfPlace(vector, sortedVec, comparator);
+
+ // verify results
+ verifyResults(sortedVec, (String[]) array);
+ }
+ }
+ }
+
+ @Parameterized.Parameters(name = "length = {0}, null fraction = {1}, vector = {2}")
+ public static Collection<Object[]> getParameters() {
+ List<Object[]> params = new ArrayList<>();
+ for (int length : VECTOR_LENGTHS) {
+ for (double nullFrac : NULL_FRACTIONS) {
+ params.add(new Object[]{
+ length, nullFrac, "VarCharVector",
+ (Function<BufferAllocator, VarCharVector>) (allocator -> new VarCharVector("vector", allocator)),
+ TestSortingUtil.STRING_GENERATOR
+ });
+ }
+ }
+ return params;
+ }
+
+ /**
+ * Verify results as byte arrays.
+ */
+ public static <V extends ValueVector> void verifyResults(V vector, String[] expected) {
+ assertEquals(vector.getValueCount(), expected.length);
+ for (int i = 0; i < expected.length; i++) {
+ if (expected[i] == null) {
+ assertTrue(vector.isNull(i));
+ } else {
+ assertArrayEquals(((Text) vector.getObject(i)).getBytes(), expected[i].getBytes());
+ }
+ }
+ }
+
+ /**
+ * String comparator with the same behavior as that of
+ * {@link DefaultVectorComparators.VariableWidthComparator}.
+ */
+ static class StringComparator implements Comparator<String> {
+
+ @Override
+ public int compare(String str1, String str2) {
+ if (str1 == null || str2 == null) {
+ if (str1 == null && str2 == null) {
+ return 0;
+ }
+
+ return str1 == null ? -1 : 1;
+ }
+
+ byte[] bytes1 = str1.getBytes();
+ byte[] bytes2 = str2.getBytes();
+
+ for (int i = 0; i < bytes1.length && i < bytes2.length; i++) {
+ if (bytes1[i] != bytes2[i]) {
+ return (bytes1[i] & 0xff) < (bytes2[i] & 0xff) ? -1 : 1;
+ }
+ }
+ return bytes1.length - bytes2.length;
+ }
+ }
+}
diff --git a/src/arrow/java/api-changes.md b/src/arrow/java/api-changes.md
new file mode 100644
index 000000000..22003c335
--- /dev/null
+++ b/src/arrow/java/api-changes.md
@@ -0,0 +1,32 @@
+<!---
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+# Arrow Java API Changes
+
+This document tracks behavior changes to Java APIs, as listed below.
+
+- **[ARROW-5973](https://issues.apache.org/jira/browse/ARROW-5973)**:
+ * **Start date**: 2019/07/18
+ * **Resolve date**: 2019/07/20
+ * **Brief description**: The semantics of the get methods for [VarCharVector](./vector/scr/main/org/apache/arrow/vector/VarCharVector.java), [VarBinaryVector](./vector/scr/main/org/apache/arrow/vector/VarBinaryVector.java), and [FixedSizeBinaryVector](./vector/scr/main/org/apache/arrow/vector/FixedSizeBinaryVector.java) changes. In the past, if the validity bit is clear, the methods throw throws an IllegalStateException when NULL_CHECKING_ENABLED is set, or returns an empty object when the flag is not set. Now, the get methods return a null if the validity bit is clear.
+
+- **[ARROW-5842](https://issues.apache.org/jira/browse/ARROW-5842)**:
+ * **Start date**: 2019/07/04
+ * **Resolve date**: 2019/07/11
+ * **Brief description**: The semantics of lastSet member in class [ListVector](./vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java) changes. In the past, it refers to the next index that will be set. After the change it points to the last index that is actually set.
diff --git a/src/arrow/java/c/CMakeLists.txt b/src/arrow/java/c/CMakeLists.txt
new file mode 100644
index 000000000..1025f87af
--- /dev/null
+++ b/src/arrow/java/c/CMakeLists.txt
@@ -0,0 +1,51 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#
+# arrow_cdata_java
+#
+
+cmake_minimum_required(VERSION 3.11)
+message(STATUS "Building using CMake version: ${CMAKE_VERSION}")
+project(arrow_cdata_java)
+
+# Find java/jni
+include(UseJava)
+
+find_package(Java REQUIRED)
+find_package(JNI REQUIRED)
+
+set(JNI_HEADERS_DIR "${CMAKE_CURRENT_BINARY_DIR}/generated")
+
+include_directories(${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}
+ ${JNI_INCLUDE_DIRS} ${JNI_HEADERS_DIR})
+
+add_jar(${PROJECT_NAME}
+ src/main/java/org/apache/arrow/c/jni/JniLoader.java
+ src/main/java/org/apache/arrow/c/jni/JniWrapper.java
+ src/main/java/org/apache/arrow/c/jni/PrivateData.java
+ GENERATE_NATIVE_HEADERS
+ arrow_cdata_java-native
+ DESTINATION
+ ${JNI_HEADERS_DIR})
+
+set(SOURCES src/main/cpp/jni_wrapper.cc)
+add_library(arrow_cdata_jni SHARED ${SOURCES})
+target_link_libraries(arrow_cdata_jni ${JAVA_JVM_LIBRARY})
+add_dependencies(arrow_cdata_jni ${PROJECT_NAME})
+
+install(TARGETS arrow_cdata_jni DESTINATION ${CMAKE_INSTALL_LIBDIR})
diff --git a/src/arrow/java/c/README.md b/src/arrow/java/c/README.md
new file mode 100644
index 000000000..ce73f531c
--- /dev/null
+++ b/src/arrow/java/c/README.md
@@ -0,0 +1,54 @@
+<!---
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+# C Interfaces for Arrow Java
+
+## Setup Build Environment
+
+install:
+ - Java 8 or later
+ - Maven 3.3 or later
+ - A C++11-enabled compiler
+ - CMake 3.11 or later
+ - Make or ninja build utilities
+
+## Building JNI wrapper shared library
+
+```
+mkdir -p build
+pushd build
+cmake ..
+cmake --build .
+popd
+```
+
+## Building and running tests
+
+Run tests with
+
+```
+mvn test
+```
+
+To install Apache Arrow (Java) with this module enabled run the following from the project root directory:
+
+```
+cd java
+mvn -Parrow-c-data install
+```
diff --git a/src/arrow/java/c/pom.xml b/src/arrow/java/c/pom.xml
new file mode 100644
index 000000000..901b084fd
--- /dev/null
+++ b/src/arrow/java/c/pom.xml
@@ -0,0 +1,77 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+ license agreements. See the NOTICE file distributed with this work for additional
+ information regarding copyright ownership. The ASF licenses this file to
+ You under the Apache License, Version 2.0 (the "License"); you may not use
+ this file except in compliance with the License. You may obtain a copy of
+ the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+ by applicable law or agreed to in writing, software distributed under the
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ OF ANY KIND, either express or implied. See the License for the specific
+ language governing permissions and limitations under the License. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <parent>
+ <artifactId>arrow-java-root</artifactId>
+ <groupId>org.apache.arrow</groupId>
+ <version>6.0.1</version>
+ </parent>
+ <modelVersion>4.0.0</modelVersion>
+
+ <artifactId>arrow-c-data</artifactId>
+ <name>Arrow Java C Data Interface</name>
+ <description>Java implementation of C Data Interface</description>
+ <packaging>jar</packaging>
+ <properties>
+ <arrow.c.jni.dist.dir>./build</arrow.c.jni.dist.dir>
+ </properties>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-vector</artifactId>
+ <version>${project.version}</version>
+ <scope>compile</scope>
+ <classifier>${arrow.vector.classifier}</classifier>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-vector</artifactId>
+ <version>${project.version}</version>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-memory-core</artifactId>
+ <version>${project.version}</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-memory-unsafe</artifactId>
+ <version>${project.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ <version>${dep.guava.version}</version>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+ <build>
+ <resources>
+ <resource>
+ <directory>${arrow.c.jni.dist.dir}</directory>
+ <includes>
+ <include>**/*arrow_cdata_jni.*</include>
+ </includes>
+ </resource>
+ </resources>
+ </build>
+
+</project>
diff --git a/src/arrow/java/c/src/main/cpp/abi.h b/src/arrow/java/c/src/main/cpp/abi.h
new file mode 100644
index 000000000..a78170dbd
--- /dev/null
+++ b/src/arrow/java/c/src/main/cpp/abi.h
@@ -0,0 +1,103 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define ARROW_FLAG_DICTIONARY_ORDERED 1
+#define ARROW_FLAG_NULLABLE 2
+#define ARROW_FLAG_MAP_KEYS_SORTED 4
+
+struct ArrowSchema {
+ // Array type description
+ const char* format;
+ const char* name;
+ const char* metadata;
+ int64_t flags;
+ int64_t n_children;
+ struct ArrowSchema** children;
+ struct ArrowSchema* dictionary;
+
+ // Release callback
+ void (*release)(struct ArrowSchema*);
+ // Opaque producer-specific data
+ void* private_data;
+};
+
+struct ArrowArray {
+ // Array data description
+ int64_t length;
+ int64_t null_count;
+ int64_t offset;
+ int64_t n_buffers;
+ int64_t n_children;
+ const void** buffers;
+ struct ArrowArray** children;
+ struct ArrowArray* dictionary;
+
+ // Release callback
+ void (*release)(struct ArrowArray*);
+ // Opaque producer-specific data
+ void* private_data;
+};
+
+// EXPERIMENTAL: C stream interface
+
+struct ArrowArrayStream {
+ // Callback to get the stream type
+ // (will be the same for all arrays in the stream).
+ //
+ // Return value: 0 if successful, an `errno`-compatible error code otherwise.
+ //
+ // If successful, the ArrowSchema must be released independently from the stream.
+ int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out);
+
+ // Callback to get the next array
+ // (if no error and the array is released, the stream has ended)
+ //
+ // Return value: 0 if successful, an `errno`-compatible error code otherwise.
+ //
+ // If successful, the ArrowArray must be released independently from the stream.
+ int (*get_next)(struct ArrowArrayStream*, struct ArrowArray* out);
+
+ // Callback to get optional detailed error information.
+ // This must only be called if the last stream operation failed
+ // with a non-0 return code.
+ //
+ // Return value: pointer to a null-terminated character array describing
+ // the last error, or NULL if no description is available.
+ //
+ // The returned pointer is only valid until the next operation on this stream
+ // (including release).
+ const char* (*get_last_error)(struct ArrowArrayStream*);
+
+ // Release callback: release the stream's own resources.
+ // Note that arrays returned by `get_next` must be individually released.
+ void (*release)(struct ArrowArrayStream*);
+
+ // Opaque producer-specific data
+ void* private_data;
+};
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/arrow/java/c/src/main/cpp/jni_wrapper.cc b/src/arrow/java/c/src/main/cpp/jni_wrapper.cc
new file mode 100644
index 000000000..cfb0af9bc
--- /dev/null
+++ b/src/arrow/java/c/src/main/cpp/jni_wrapper.cc
@@ -0,0 +1,263 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <jni.h>
+
+#include <cassert>
+#include <memory>
+#include <stdexcept>
+#include <string>
+
+#include "./abi.h"
+#include "org_apache_arrow_c_jni_JniWrapper.h"
+
+namespace {
+
+jclass CreateGlobalClassReference(JNIEnv* env, const char* class_name) {
+ jclass local_class = env->FindClass(class_name);
+ jclass global_class = (jclass)env->NewGlobalRef(local_class);
+ env->DeleteLocalRef(local_class);
+ return global_class;
+}
+
+jclass illegal_access_exception_class;
+jclass illegal_argument_exception_class;
+jclass runtime_exception_class;
+jclass private_data_class;
+
+jmethodID private_data_close_method;
+
+jint JNI_VERSION = JNI_VERSION_1_6;
+
+class JniPendingException : public std::runtime_error {
+ public:
+ explicit JniPendingException(const std::string& arg) : std::runtime_error(arg) {}
+};
+
+void ThrowPendingException(const std::string& message) {
+ throw JniPendingException(message);
+}
+
+void JniThrow(std::string message) { ThrowPendingException(message); }
+
+jmethodID GetMethodID(JNIEnv* env, jclass this_class, const char* name, const char* sig) {
+ jmethodID ret = env->GetMethodID(this_class, name, sig);
+ if (ret == nullptr) {
+ std::string error_message = "Unable to find method " + std::string(name) +
+ " within signature " + std::string(sig);
+ ThrowPendingException(error_message);
+ }
+ return ret;
+}
+
+class InnerPrivateData {
+ public:
+ InnerPrivateData(JavaVM* vm, jobject private_data)
+ : vm_(vm), j_private_data_(private_data) {}
+
+ JavaVM* vm_;
+ jobject j_private_data_;
+};
+
+class JNIEnvGuard {
+ public:
+ explicit JNIEnvGuard(JavaVM* vm) : vm_(vm), should_detach_(false) {
+ JNIEnv* env;
+ jint code = vm->GetEnv(reinterpret_cast<void**>(&env), JNI_VERSION);
+ if (code == JNI_EDETACHED) {
+ JavaVMAttachArgs args;
+ args.version = JNI_VERSION;
+ args.name = NULL;
+ args.group = NULL;
+ code = vm->AttachCurrentThread(reinterpret_cast<void**>(&env), &args);
+ should_detach_ = (code == JNI_OK);
+ }
+ if (code != JNI_OK) {
+ ThrowPendingException("Failed to attach the current thread to a Java VM");
+ }
+ env_ = env;
+ }
+
+ JNIEnv* env() { return env_; }
+
+ ~JNIEnvGuard() {
+ if (should_detach_) {
+ vm_->DetachCurrentThread();
+ should_detach_ = false;
+ }
+ }
+
+ private:
+ bool should_detach_;
+ JavaVM* vm_;
+ JNIEnv* env_;
+};
+
+template <typename T>
+void release_exported(T* base) {
+ // This should not be called on already released structure
+ assert(base->release != nullptr);
+
+ // Release children
+ for (int64_t i = 0; i < base->n_children; ++i) {
+ T* child = base->children[i];
+ if (child->release != nullptr) {
+ child->release(child);
+ assert(child->release == nullptr);
+ }
+ }
+
+ // Release dictionary
+ T* dict = base->dictionary;
+ if (dict != nullptr && dict->release != nullptr) {
+ dict->release(dict);
+ assert(dict->release == nullptr);
+ }
+
+ // Release all data directly owned by the struct
+ InnerPrivateData* private_data =
+ reinterpret_cast<InnerPrivateData*>(base->private_data);
+
+ JNIEnvGuard guard(private_data->vm_);
+ JNIEnv* env = guard.env();
+
+ env->CallObjectMethod(private_data->j_private_data_, private_data_close_method);
+ if (env->ExceptionCheck()) {
+ env->ExceptionDescribe();
+ env->ExceptionClear();
+ ThrowPendingException("Error calling close of private data");
+ }
+ env->DeleteGlobalRef(private_data->j_private_data_);
+ delete private_data;
+ base->private_data = nullptr;
+
+ // Mark released
+ base->release = nullptr;
+}
+} // namespace
+
+#define JNI_METHOD_START try {
+// macro ended
+
+#define JNI_METHOD_END(fallback_expr) \
+ } \
+ catch (JniPendingException & e) { \
+ env->ThrowNew(runtime_exception_class, e.what()); \
+ return fallback_expr; \
+ }
+// macro ended
+
+jint JNI_OnLoad(JavaVM* vm, void* reserved) {
+ JNIEnv* env;
+ if (vm->GetEnv(reinterpret_cast<void**>(&env), JNI_VERSION) != JNI_OK) {
+ return JNI_ERR;
+ }
+ JNI_METHOD_START
+ illegal_access_exception_class =
+ CreateGlobalClassReference(env, "Ljava/lang/IllegalAccessException;");
+ illegal_argument_exception_class =
+ CreateGlobalClassReference(env, "Ljava/lang/IllegalArgumentException;");
+ runtime_exception_class =
+ CreateGlobalClassReference(env, "Ljava/lang/RuntimeException;");
+ private_data_class =
+ CreateGlobalClassReference(env, "Lorg/apache/arrow/c/jni/PrivateData;");
+
+ private_data_close_method = GetMethodID(env, private_data_class, "close", "()V");
+
+ return JNI_VERSION;
+ JNI_METHOD_END(JNI_ERR)
+}
+
+void JNI_OnUnload(JavaVM* vm, void* reserved) {
+ JNIEnv* env;
+ vm->GetEnv(reinterpret_cast<void**>(&env), JNI_VERSION);
+ env->DeleteGlobalRef(illegal_access_exception_class);
+ env->DeleteGlobalRef(illegal_argument_exception_class);
+ env->DeleteGlobalRef(runtime_exception_class);
+}
+
+/*
+ * Class: org_apache_arrow_c_jni_JniWrapper
+ * Method: releaseSchema
+ * Signature: (J)V
+ */
+JNIEXPORT void JNICALL Java_org_apache_arrow_c_jni_JniWrapper_releaseSchema(
+ JNIEnv* env, jobject, jlong address) {
+ JNI_METHOD_START
+ ArrowSchema* schema = reinterpret_cast<ArrowSchema*>(address);
+ if (schema->release != nullptr) {
+ schema->release(schema);
+ }
+ JNI_METHOD_END()
+}
+
+/*
+ * Class: org_apache_arrow_c_jni_JniWrapper
+ * Method: releaseArray
+ * Signature: (J)V
+ */
+JNIEXPORT void JNICALL
+Java_org_apache_arrow_c_jni_JniWrapper_releaseArray(JNIEnv* env, jobject, jlong address) {
+ JNI_METHOD_START
+ ArrowArray* array = reinterpret_cast<ArrowArray*>(address);
+ if (array->release != nullptr) {
+ array->release(array);
+ }
+ JNI_METHOD_END()
+}
+
+/*
+ * Class: org_apache_arrow_c_jni_JniWrapper
+ * Method: exportSchema
+ * Signature: (JLorg/apache/arrow/c/jni/PrivateData;)V
+ */
+JNIEXPORT void JNICALL Java_org_apache_arrow_c_jni_JniWrapper_exportSchema(
+ JNIEnv* env, jobject, jlong address, jobject private_data) {
+ JNI_METHOD_START
+ ArrowSchema* schema = reinterpret_cast<ArrowSchema*>(address);
+
+ JavaVM* vm;
+ if (env->GetJavaVM(&vm) != JNI_OK) {
+ JniThrow("Unable to get JavaVM instance");
+ }
+ jobject private_data_ref = env->NewGlobalRef(private_data);
+
+ schema->private_data = new InnerPrivateData(vm, private_data_ref);
+ schema->release = &release_exported<ArrowSchema>;
+ JNI_METHOD_END()
+}
+
+/*
+ * Class: org_apache_arrow_c_jni_JniWrapper
+ * Method: exportArray
+ * Signature: (JLorg/apache/arrow/c/jni/PrivateData;)V
+ */
+JNIEXPORT void JNICALL Java_org_apache_arrow_c_jni_JniWrapper_exportArray(
+ JNIEnv* env, jobject, jlong address, jobject private_data) {
+ JNI_METHOD_START
+ ArrowArray* array = reinterpret_cast<ArrowArray*>(address);
+
+ JavaVM* vm;
+ if (env->GetJavaVM(&vm) != JNI_OK) {
+ JniThrow("Unable to get JavaVM instance");
+ }
+ jobject private_data_ref = env->NewGlobalRef(private_data);
+
+ array->private_data = new InnerPrivateData(vm, private_data_ref);
+ array->release = &release_exported<ArrowArray>;
+ JNI_METHOD_END()
+}
diff --git a/src/arrow/java/c/src/main/java/org/apache/arrow/c/ArrayExporter.java b/src/arrow/java/c/src/main/java/org/apache/arrow/c/ArrayExporter.java
new file mode 100644
index 000000000..d6479a3ba
--- /dev/null
+++ b/src/arrow/java/c/src/main/java/org/apache/arrow/c/ArrayExporter.java
@@ -0,0 +1,149 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.c;
+
+import static org.apache.arrow.c.NativeUtil.NULL;
+import static org.apache.arrow.c.NativeUtil.addressOrNull;
+import static org.apache.arrow.util.Preconditions.checkNotNull;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.arrow.c.jni.JniWrapper;
+import org.apache.arrow.c.jni.PrivateData;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+
+/**
+ * Exporter for {@link ArrowArray}.
+ */
+final class ArrayExporter {
+ private final BufferAllocator allocator;
+
+ public ArrayExporter(BufferAllocator allocator) {
+ this.allocator = allocator;
+ }
+
+ /**
+ * Private data structure for exported arrays.
+ */
+ static class ExportedArrayPrivateData implements PrivateData {
+ ArrowBuf buffers_ptrs;
+ List<ArrowBuf> buffers;
+ ArrowBuf children_ptrs;
+ List<ArrowArray> children;
+ ArrowArray dictionary;
+
+ @Override
+ public void close() {
+ NativeUtil.closeBuffer(buffers_ptrs);
+
+ if (buffers != null) {
+ for (ArrowBuf buffer : buffers) {
+ NativeUtil.closeBuffer(buffer);
+ }
+ }
+ NativeUtil.closeBuffer(children_ptrs);
+
+ if (children != null) {
+ for (ArrowArray child : children) {
+ child.close();
+ }
+ }
+
+ if (dictionary != null) {
+ dictionary.close();
+ }
+ }
+ }
+
+ void export(ArrowArray array, FieldVector vector, DictionaryProvider dictionaryProvider) {
+ List<FieldVector> children = vector.getChildrenFromFields();
+ List<ArrowBuf> buffers = vector.getFieldBuffers();
+ int valueCount = vector.getValueCount();
+ int nullCount = vector.getNullCount();
+ DictionaryEncoding dictionaryEncoding = vector.getField().getDictionary();
+
+ ExportedArrayPrivateData data = new ExportedArrayPrivateData();
+ try {
+ if (children != null) {
+ data.children = new ArrayList<>(children.size());
+ data.children_ptrs = allocator.buffer((long) children.size() * Long.BYTES);
+ for (int i = 0; i < children.size(); i++) {
+ ArrowArray child = ArrowArray.allocateNew(allocator);
+ data.children.add(child);
+ data.children_ptrs.writeLong(child.memoryAddress());
+ }
+ }
+
+ if (buffers != null) {
+ data.buffers = new ArrayList<>(buffers.size());
+ data.buffers_ptrs = allocator.buffer((long) buffers.size() * Long.BYTES);
+ for (ArrowBuf arrowBuf : buffers) {
+ if (arrowBuf != null) {
+ arrowBuf.getReferenceManager().retain();
+ data.buffers_ptrs.writeLong(arrowBuf.memoryAddress());
+ } else {
+ data.buffers_ptrs.writeLong(NULL);
+ }
+ data.buffers.add(arrowBuf);
+ }
+ }
+
+ if (dictionaryEncoding != null) {
+ Dictionary dictionary = dictionaryProvider.lookup(dictionaryEncoding.getId());
+ checkNotNull(dictionary, "Dictionary lookup failed on export of dictionary encoded array");
+
+ data.dictionary = ArrowArray.allocateNew(allocator);
+ FieldVector dictionaryVector = dictionary.getVector();
+ export(data.dictionary, dictionaryVector, dictionaryProvider);
+ }
+
+ ArrowArray.Snapshot snapshot = new ArrowArray.Snapshot();
+ snapshot.length = valueCount;
+ snapshot.null_count = nullCount;
+ snapshot.offset = 0;
+ snapshot.n_buffers = (data.buffers != null) ? data.buffers.size() : 0;
+ snapshot.n_children = (data.children != null) ? data.children.size() : 0;
+ snapshot.buffers = addressOrNull(data.buffers_ptrs);
+ snapshot.children = addressOrNull(data.children_ptrs);
+ snapshot.dictionary = addressOrNull(data.dictionary);
+ snapshot.release = NULL;
+ array.save(snapshot);
+
+ // sets release and private data
+ JniWrapper.get().exportArray(array.memoryAddress(), data);
+ } catch (Exception e) {
+ data.close();
+ throw e;
+ }
+
+ // Export children
+ if (children != null) {
+ for (int i = 0; i < children.size(); i++) {
+ FieldVector childVector = children.get(i);
+ ArrowArray child = data.children.get(i);
+ export(child, childVector, dictionaryProvider);
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/c/src/main/java/org/apache/arrow/c/ArrayImporter.java b/src/arrow/java/c/src/main/java/org/apache/arrow/c/ArrayImporter.java
new file mode 100644
index 000000000..e82cef6a8
--- /dev/null
+++ b/src/arrow/java/c/src/main/java/org/apache/arrow/c/ArrayImporter.java
@@ -0,0 +1,152 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.c;
+
+import static org.apache.arrow.c.NativeUtil.NULL;
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+import static org.apache.arrow.util.Preconditions.checkNotNull;
+import static org.apache.arrow.util.Preconditions.checkState;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.TypeLayout;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+
+/**
+ * Importer for {@link ArrowArray}.
+ */
+final class ArrayImporter {
+ private static final int MAX_IMPORT_RECURSION_LEVEL = 64;
+
+ private final BufferAllocator allocator;
+ private final FieldVector vector;
+ private final DictionaryProvider dictionaryProvider;
+
+ private CDataReferenceManager referenceManager;
+ private int recursionLevel;
+
+ ArrayImporter(BufferAllocator allocator, FieldVector vector, DictionaryProvider dictionaryProvider) {
+ this.allocator = allocator;
+ this.vector = vector;
+ this.dictionaryProvider = dictionaryProvider;
+ }
+
+ void importArray(ArrowArray src) {
+ ArrowArray.Snapshot snapshot = src.snapshot();
+ checkState(snapshot.release != NULL, "Cannot import released ArrowArray");
+
+ // Move imported array
+ ArrowArray ownedArray = ArrowArray.allocateNew(allocator);
+ ownedArray.save(snapshot);
+ src.markReleased();
+ src.close();
+
+ recursionLevel = 0;
+
+ // This keeps the array alive as long as there are any buffers that need it
+ referenceManager = new CDataReferenceManager(ownedArray);
+ try {
+ referenceManager.increment();
+ doImport(snapshot);
+ } finally {
+ referenceManager.release();
+ }
+ }
+
+ private void importChild(ArrayImporter parent, ArrowArray src) {
+ ArrowArray.Snapshot snapshot = src.snapshot();
+ checkState(snapshot.release != NULL, "Cannot import released ArrowArray");
+ recursionLevel = parent.recursionLevel + 1;
+ checkState(recursionLevel <= MAX_IMPORT_RECURSION_LEVEL, "Recursion level in ArrowArray struct exceeded");
+ // Child buffers will keep the entire parent import alive.
+ // Perhaps we can move the child structs on import,
+ // but that is another level of complication.
+ referenceManager = parent.referenceManager;
+ doImport(snapshot);
+ }
+
+ private void doImport(ArrowArray.Snapshot snapshot) {
+ // First import children (required for reconstituting parent array data)
+ long[] children = NativeUtil.toJavaArray(snapshot.children, checkedCastToInt(snapshot.n_children));
+ if (children != null && children.length > 0) {
+ List<FieldVector> childVectors = vector.getChildrenFromFields();
+ checkState(children.length == childVectors.size(), "ArrowArray struct has %s children (expected %s)",
+ children.length, childVectors.size());
+ for (int i = 0; i < children.length; i++) {
+ checkState(children[i] != NULL, "ArrowArray struct has NULL child at position %s", i);
+ ArrayImporter childImporter = new ArrayImporter(allocator, childVectors.get(i), dictionaryProvider);
+ childImporter.importChild(this, ArrowArray.wrap(children[i]));
+ }
+ }
+
+ // Handle import of a dictionary encoded vector
+ if (snapshot.dictionary != NULL) {
+ DictionaryEncoding encoding = vector.getField().getDictionary();
+ checkNotNull(encoding, "Missing encoding on import of ArrowArray with dictionary");
+
+ Dictionary dictionary = dictionaryProvider.lookup(encoding.getId());
+ checkNotNull(dictionary, "Dictionary lookup failed on import of ArrowArray with dictionary");
+
+ // reset the dictionary vector to the initial state
+ dictionary.getVector().clear();
+
+ ArrayImporter dictionaryImporter = new ArrayImporter(allocator, dictionary.getVector(), dictionaryProvider);
+ dictionaryImporter.importChild(this, ArrowArray.wrap(snapshot.dictionary));
+ }
+
+ // Import main data
+ ArrowFieldNode fieldNode = new ArrowFieldNode(snapshot.length, snapshot.null_count);
+ List<ArrowBuf> buffers = importBuffers(snapshot);
+ try {
+ vector.loadFieldBuffers(fieldNode, buffers);
+ } catch (RuntimeException e) {
+ throw new IllegalArgumentException(
+ "Could not load buffers for field " + vector.getField() + ". error message: " + e.getMessage(), e);
+ }
+ }
+
+ private List<ArrowBuf> importBuffers(ArrowArray.Snapshot snapshot) {
+ long[] buffers = NativeUtil.toJavaArray(snapshot.buffers, checkedCastToInt(snapshot.n_buffers));
+ if (buffers == null || buffers.length == 0) {
+ return new ArrayList<>();
+ }
+
+ int buffersCount = TypeLayout.getTypeBufferCount(vector.getField().getType());
+ checkState(buffers.length == buffersCount, "Expected %s buffers for imported type %s, ArrowArray struct has %s",
+ buffersCount, vector.getField().getType().getTypeID(), buffers.length);
+
+ List<ArrowBuf> result = new ArrayList<>(buffersCount);
+ for (long bufferPtr : buffers) {
+ ArrowBuf buffer = null;
+ if (bufferPtr != NULL) {
+ // TODO(roee88): an API for getting the size for each buffer is not yet
+ // available
+ buffer = new ArrowBuf(referenceManager, null, Integer.MAX_VALUE, bufferPtr);
+ }
+ result.add(buffer);
+ }
+ return result;
+ }
+}
diff --git a/src/arrow/java/c/src/main/java/org/apache/arrow/c/ArrowArray.java b/src/arrow/java/c/src/main/java/org/apache/arrow/c/ArrowArray.java
new file mode 100644
index 000000000..99fe0432c
--- /dev/null
+++ b/src/arrow/java/c/src/main/java/org/apache/arrow/c/ArrowArray.java
@@ -0,0 +1,185 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.c;
+
+import static org.apache.arrow.c.NativeUtil.NULL;
+import static org.apache.arrow.util.Preconditions.checkNotNull;
+
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+
+import org.apache.arrow.c.jni.JniWrapper;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.ReferenceManager;
+import org.apache.arrow.memory.util.MemoryUtil;
+
+/**
+ * C Data Interface ArrowArray.
+ * <p>
+ * Represents a wrapper for the following C structure:
+ *
+ * <pre>
+ * struct ArrowArray {
+ * // Array data description
+ * int64_t length;
+ * int64_t null_count;
+ * int64_t offset;
+ * int64_t n_buffers;
+ * int64_t n_children;
+ * const void** buffers;
+ * struct ArrowArray** children;
+ * struct ArrowArray* dictionary;
+ *
+ * // Release callback
+ * void (*release)(struct ArrowArray*);
+ * // Opaque producer-specific data
+ * void* private_data;
+ * };
+ * </pre>
+ */
+public class ArrowArray implements BaseStruct {
+ private static final int SIZE_OF = 80;
+ private static final int INDEX_RELEASE_CALLBACK = 64;
+
+ private ArrowBuf data;
+
+ /**
+ * Snapshot of the ArrowArray raw data.
+ */
+ public static class Snapshot {
+ public long length;
+ public long null_count;
+ public long offset;
+ public long n_buffers;
+ public long n_children;
+ public long buffers;
+ public long children;
+ public long dictionary;
+ public long release;
+ public long private_data;
+
+ /**
+ * Initialize empty ArrowArray snapshot.
+ */
+ public Snapshot() {
+ length = NULL;
+ null_count = NULL;
+ offset = NULL;
+ n_buffers = NULL;
+ n_children = NULL;
+ buffers = NULL;
+ children = NULL;
+ dictionary = NULL;
+ release = NULL;
+ private_data = NULL;
+ }
+ }
+
+ /**
+ * Create ArrowArray from an existing memory address.
+ * <p>
+ * The resulting ArrowArray does not own the memory.
+ *
+ * @param memoryAddress Memory address to wrap
+ * @return A new ArrowArray instance
+ */
+ public static ArrowArray wrap(long memoryAddress) {
+ return new ArrowArray(new ArrowBuf(ReferenceManager.NO_OP, null, ArrowArray.SIZE_OF, memoryAddress));
+ }
+
+ /**
+ * Create ArrowArray by allocating memory.
+ * <p>
+ * The resulting ArrowArray owns the memory.
+ *
+ * @param allocator Allocator for memory allocations
+ * @return A new ArrowArray instance
+ */
+ public static ArrowArray allocateNew(BufferAllocator allocator) {
+ ArrowArray array = new ArrowArray(allocator.buffer(ArrowArray.SIZE_OF));
+ array.markReleased();
+ return array;
+ }
+
+ ArrowArray(ArrowBuf data) {
+ checkNotNull(data, "ArrowArray initialized with a null buffer");
+ this.data = data;
+ }
+
+ /**
+ * Mark the array as released.
+ */
+ public void markReleased() {
+ directBuffer().putLong(INDEX_RELEASE_CALLBACK, NULL);
+ }
+
+ @Override
+ public long memoryAddress() {
+ checkNotNull(data, "ArrowArray is already closed");
+ return data.memoryAddress();
+ }
+
+ @Override
+ public void release() {
+ long address = memoryAddress();
+ JniWrapper.get().releaseArray(address);
+ }
+
+ @Override
+ public void close() {
+ if (data != null) {
+ data.close();
+ data = null;
+ }
+ }
+
+ private ByteBuffer directBuffer() {
+ return MemoryUtil.directBuffer(memoryAddress(), ArrowArray.SIZE_OF).order(ByteOrder.nativeOrder());
+ }
+
+ /**
+ * Take a snapshot of the ArrowArray raw values.
+ *
+ * @return snapshot
+ */
+ public Snapshot snapshot() {
+ ByteBuffer data = directBuffer();
+ Snapshot snapshot = new Snapshot();
+ snapshot.length = data.getLong();
+ snapshot.null_count = data.getLong();
+ snapshot.offset = data.getLong();
+ snapshot.n_buffers = data.getLong();
+ snapshot.n_children = data.getLong();
+ snapshot.buffers = data.getLong();
+ snapshot.children = data.getLong();
+ snapshot.dictionary = data.getLong();
+ snapshot.release = data.getLong();
+ snapshot.private_data = data.getLong();
+ return snapshot;
+ }
+
+ /**
+ * Write values from Snapshot to the underlying ArrowArray memory buffer.
+ */
+ public void save(Snapshot snapshot) {
+ directBuffer().putLong(snapshot.length).putLong(snapshot.null_count).putLong(snapshot.offset)
+ .putLong(snapshot.n_buffers).putLong(snapshot.n_children).putLong(snapshot.buffers).putLong(snapshot.children)
+ .putLong(snapshot.dictionary).putLong(snapshot.release).putLong(snapshot.private_data);
+ }
+}
diff --git a/src/arrow/java/c/src/main/java/org/apache/arrow/c/ArrowSchema.java b/src/arrow/java/c/src/main/java/org/apache/arrow/c/ArrowSchema.java
new file mode 100644
index 000000000..b34ce7d5a
--- /dev/null
+++ b/src/arrow/java/c/src/main/java/org/apache/arrow/c/ArrowSchema.java
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.c;
+
+import static org.apache.arrow.c.NativeUtil.NULL;
+import static org.apache.arrow.util.Preconditions.checkNotNull;
+
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+
+import org.apache.arrow.c.jni.JniWrapper;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.ReferenceManager;
+import org.apache.arrow.memory.util.MemoryUtil;
+
+/**
+ * C Data Interface ArrowSchema.
+ * <p>
+ * Represents a wrapper for the following C structure:
+ *
+ * <pre>
+ * struct ArrowSchema {
+ * // Array type description
+ * const char* format;
+ * const char* name;
+ * const char* metadata;
+ * int64_t flags;
+ * int64_t n_children;
+ * struct ArrowSchema** children;
+ * struct ArrowSchema* dictionary;
+ *
+ * // Release callback
+ * void (*release)(struct ArrowSchema*);
+ * // Opaque producer-specific data
+ * void* private_data;
+ * };
+ * </pre>
+ */
+public class ArrowSchema implements BaseStruct {
+ private static final int SIZE_OF = 72;
+
+ private ArrowBuf data;
+
+ /**
+ * Snapshot of the ArrowSchema raw data.
+ */
+ public static class Snapshot {
+ public long format;
+ public long name;
+ public long metadata;
+ public long flags;
+ public long n_children;
+ public long children;
+ public long dictionary;
+ public long release;
+ public long private_data;
+
+ /**
+ * Initialize empty ArrowSchema snapshot.
+ */
+ public Snapshot() {
+ format = NULL;
+ name = NULL;
+ metadata = NULL;
+ flags = NULL;
+ n_children = NULL;
+ children = NULL;
+ dictionary = NULL;
+ release = NULL;
+ private_data = NULL;
+ }
+ }
+
+ /**
+ * Create ArrowSchema from an existing memory address.
+ * <p>
+ * The resulting ArrowSchema does not own the memory.
+ *
+ * @param memoryAddress Memory address to wrap
+ * @return A new ArrowSchema instance
+ */
+ public static ArrowSchema wrap(long memoryAddress) {
+ return new ArrowSchema(new ArrowBuf(ReferenceManager.NO_OP, null, ArrowSchema.SIZE_OF, memoryAddress));
+ }
+
+ /**
+ * Create ArrowSchema by allocating memory.
+ * <p>
+ * The resulting ArrowSchema owns the memory.
+ *
+ * @param allocator Allocator for memory allocations
+ * @return A new ArrowSchema instance
+ */
+ public static ArrowSchema allocateNew(BufferAllocator allocator) {
+ return new ArrowSchema(allocator.buffer(ArrowSchema.SIZE_OF));
+ }
+
+ ArrowSchema(ArrowBuf data) {
+ checkNotNull(data, "ArrowSchema initialized with a null buffer");
+ this.data = data;
+ }
+
+ @Override
+ public long memoryAddress() {
+ checkNotNull(data, "ArrowSchema is already closed");
+ return data.memoryAddress();
+ }
+
+ @Override
+ public void release() {
+ long address = memoryAddress();
+ JniWrapper.get().releaseSchema(address);
+ }
+
+ @Override
+ public void close() {
+ if (data != null) {
+ data.close();
+ data = null;
+ }
+ }
+
+ private ByteBuffer directBuffer() {
+ return MemoryUtil.directBuffer(memoryAddress(), ArrowSchema.SIZE_OF).order(ByteOrder.nativeOrder());
+ }
+
+ /**
+ * Take a snapshot of the ArrowSchema raw values.
+ *
+ * @return snapshot
+ */
+ public Snapshot snapshot() {
+ ByteBuffer data = directBuffer();
+ Snapshot snapshot = new Snapshot();
+ snapshot.format = data.getLong();
+ snapshot.name = data.getLong();
+ snapshot.metadata = data.getLong();
+ snapshot.flags = data.getLong();
+ snapshot.n_children = data.getLong();
+ snapshot.children = data.getLong();
+ snapshot.dictionary = data.getLong();
+ snapshot.release = data.getLong();
+ snapshot.private_data = data.getLong();
+ return snapshot;
+ }
+
+ /**
+ * Write values from Snapshot to the underlying ArrowSchema memory buffer.
+ */
+ public void save(Snapshot snapshot) {
+ directBuffer().putLong(snapshot.format).putLong(snapshot.name).putLong(snapshot.metadata).putLong(snapshot.flags)
+ .putLong(snapshot.n_children).putLong(snapshot.children).putLong(snapshot.dictionary).putLong(snapshot.release)
+ .putLong(snapshot.private_data);
+ }
+}
diff --git a/src/arrow/java/c/src/main/java/org/apache/arrow/c/BaseStruct.java b/src/arrow/java/c/src/main/java/org/apache/arrow/c/BaseStruct.java
new file mode 100644
index 000000000..d90fe8175
--- /dev/null
+++ b/src/arrow/java/c/src/main/java/org/apache/arrow/c/BaseStruct.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.c;
+
+/**
+ * Base interface for C Data Interface structures.
+ */
+public interface BaseStruct extends AutoCloseable {
+ /**
+ * Get memory address.
+ *
+ * @return Memory address
+ */
+ long memoryAddress();
+
+ /**
+ * Call the release callback of an ArrowArray.
+ * <p>
+ * This function must not be called for child arrays.
+ */
+ void release();
+
+ /**
+ * Close to release the main buffer.
+ */
+ @Override
+ void close();
+}
diff --git a/src/arrow/java/c/src/main/java/org/apache/arrow/c/CDataDictionaryProvider.java b/src/arrow/java/c/src/main/java/org/apache/arrow/c/CDataDictionaryProvider.java
new file mode 100644
index 000000000..43bcda276
--- /dev/null
+++ b/src/arrow/java/c/src/main/java/org/apache/arrow/c/CDataDictionaryProvider.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.c;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+
+/**
+ * A DictionaryProvider that is used in C Data Interface for imports.
+ * <p>
+ * CDataDictionaryProvider is similar to
+ * {@link DictionaryProvider.MapDictionaryProvider} with a key difference that
+ * the dictionaries are owned by the provider so it must eventually be closed.
+ * <p>
+ * The typical usage is to create the CDataDictionaryProvider and pass it to
+ * {@link Data#importField} or {@link Data#importSchema} to allocate empty
+ * dictionaries based on the information in {@link ArrowSchema}. Then you can
+ * re-use the same dictionary provider in any function that imports an
+ * {@link ArrowArray} that has the same schema.
+ */
+public class CDataDictionaryProvider implements DictionaryProvider, AutoCloseable {
+
+ private final Map<Long, Dictionary> map;
+
+ public CDataDictionaryProvider() {
+ this.map = new HashMap<>();
+ }
+
+ void put(Dictionary dictionary) {
+ Dictionary previous = map.put(dictionary.getEncoding().getId(), dictionary);
+ if (previous != null) {
+ previous.getVector().close();
+ }
+ }
+
+ public final Set<Long> getDictionaryIds() {
+ return map.keySet();
+ }
+
+ @Override
+ public Dictionary lookup(long id) {
+ return map.get(id);
+ }
+
+ @Override
+ public void close() {
+ for (Dictionary dictionary : map.values()) {
+ dictionary.getVector().close();
+ }
+ map.clear();
+ }
+
+}
diff --git a/src/arrow/java/c/src/main/java/org/apache/arrow/c/CDataReferenceManager.java b/src/arrow/java/c/src/main/java/org/apache/arrow/c/CDataReferenceManager.java
new file mode 100644
index 000000000..c5c2f9779
--- /dev/null
+++ b/src/arrow/java/c/src/main/java/org/apache/arrow/c/CDataReferenceManager.java
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.c;
+
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.OwnershipTransferResult;
+import org.apache.arrow.memory.ReferenceManager;
+import org.apache.arrow.util.Preconditions;
+
+/**
+ * A ReferenceManager implementation that holds a
+ * {@link org.apache.arrow.c.BaseStruct}.
+ * <p>
+ * A reference count is maintained and once it reaches zero the struct is
+ * released (as per the C data interface specification) and closed.
+ */
+final class CDataReferenceManager implements ReferenceManager {
+ private final AtomicInteger bufRefCnt = new AtomicInteger(0);
+
+ private final BaseStruct struct;
+
+ CDataReferenceManager(BaseStruct struct) {
+ this.struct = struct;
+ }
+
+ @Override
+ public int getRefCount() {
+ return bufRefCnt.get();
+ }
+
+ @Override
+ public boolean release() {
+ return release(1);
+ }
+
+ /**
+ * Increment the reference count without any safety checks.
+ */
+ void increment() {
+ bufRefCnt.incrementAndGet();
+ }
+
+ @Override
+ public boolean release(int decrement) {
+ Preconditions.checkState(decrement >= 1, "ref count decrement should be greater than or equal to 1");
+ // decrement the ref count
+ final int refCnt = bufRefCnt.addAndGet(-decrement);
+ // the new ref count should be >= 0
+ Preconditions.checkState(refCnt >= 0, "ref count has gone negative");
+ if (refCnt == 0) {
+ // refcount of this reference manager has dropped to 0
+ // release the underlying memory
+ struct.release();
+ struct.close();
+ }
+ return refCnt == 0;
+ }
+
+ @Override
+ public void retain() {
+ retain(1);
+ }
+
+ @Override
+ public void retain(int increment) {
+ Preconditions.checkArgument(increment > 0, "retain(%s) argument is not positive", increment);
+ final int originalReferenceCount = bufRefCnt.getAndAdd(increment);
+ Preconditions.checkState(originalReferenceCount > 0, "retain called but memory was already released");
+ }
+
+ @Override
+ public ArrowBuf retain(ArrowBuf srcBuffer, BufferAllocator targetAllocator) {
+ retain();
+
+ ArrowBuf targetArrowBuf = this.deriveBuffer(srcBuffer, 0, srcBuffer.capacity());
+ targetArrowBuf.readerIndex(srcBuffer.readerIndex());
+ targetArrowBuf.writerIndex(srcBuffer.writerIndex());
+ return targetArrowBuf;
+ }
+
+ @Override
+ public ArrowBuf deriveBuffer(ArrowBuf sourceBuffer, long index, long length) {
+ final long derivedBufferAddress = sourceBuffer.memoryAddress() + index;
+ return new ArrowBuf(this, null, length, derivedBufferAddress);
+ }
+
+ @Override
+ public OwnershipTransferResult transferOwnership(ArrowBuf sourceBuffer, BufferAllocator targetAllocator) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public BufferAllocator getAllocator() {
+ return null;
+ }
+
+ @Override
+ public long getSize() {
+ return 0L;
+ }
+
+ @Override
+ public long getAccountedSize() {
+ return 0L;
+ }
+}
diff --git a/src/arrow/java/c/src/main/java/org/apache/arrow/c/Data.java b/src/arrow/java/c/src/main/java/org/apache/arrow/c/Data.java
new file mode 100644
index 000000000..27b0ce4bf
--- /dev/null
+++ b/src/arrow/java/c/src/main/java/org/apache/arrow/c/Data.java
@@ -0,0 +1,317 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.c;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.StructVectorLoader;
+import org.apache.arrow.vector.StructVectorUnloader;
+import org.apache.arrow.vector.VectorLoader;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.VectorUnloader;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeID;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+/**
+ * Functions for working with the C data interface.
+ * <p>
+ * This API is EXPERIMENTAL. Note that currently only 64bit systems are
+ * supported.
+ */
+public final class Data {
+
+ private Data() {
+ }
+
+ /**
+ * Export Java Field using the C data interface format.
+ *
+ * @param allocator Buffer allocator for allocating C data interface fields
+ * @param field Field object to export
+ * @param provider Dictionary provider for dictionary encoded fields (optional)
+ * @param out C struct where to export the field
+ */
+ public static void exportField(BufferAllocator allocator, Field field, DictionaryProvider provider, ArrowSchema out) {
+ SchemaExporter exporter = new SchemaExporter(allocator);
+ exporter.export(out, field, provider);
+ }
+
+ /**
+ * Export Java Schema using the C data interface format.
+ *
+ * @param allocator Buffer allocator for allocating C data interface fields
+ * @param schema Schema object to export
+ * @param provider Dictionary provider for dictionary encoded fields (optional)
+ * @param out C struct where to export the field
+ */
+ public static void exportSchema(BufferAllocator allocator, Schema schema, DictionaryProvider provider,
+ ArrowSchema out) {
+ // Convert to a struct field equivalent to the input schema
+ FieldType fieldType = new FieldType(false, new ArrowType.Struct(), null, schema.getCustomMetadata());
+ Field field = new Field("", fieldType, schema.getFields());
+ exportField(allocator, field, provider, out);
+ }
+
+ /**
+ * Export Java FieldVector using the C data interface format.
+ * <p>
+ * The resulting ArrowArray struct keeps the array data and buffers alive until
+ * its release callback is called by the consumer.
+ *
+ * @param allocator Buffer allocator for allocating C data interface fields
+ * @param vector Vector object to export
+ * @param provider Dictionary provider for dictionary encoded vectors
+ * (optional)
+ * @param out C struct where to export the array
+ */
+ public static void exportVector(BufferAllocator allocator, FieldVector vector, DictionaryProvider provider,
+ ArrowArray out) {
+ exportVector(allocator, vector, provider, out, null);
+ }
+
+ /**
+ * Export Java FieldVector using the C data interface format.
+ * <p>
+ * The resulting ArrowArray struct keeps the array data and buffers alive until
+ * its release callback is called by the consumer.
+ *
+ * @param allocator Buffer allocator for allocating C data interface fields
+ * @param vector Vector object to export
+ * @param provider Dictionary provider for dictionary encoded vectors
+ * (optional)
+ * @param out C struct where to export the array
+ * @param outSchema C struct where to export the array type (optional)
+ */
+ public static void exportVector(BufferAllocator allocator, FieldVector vector, DictionaryProvider provider,
+ ArrowArray out, ArrowSchema outSchema) {
+ if (outSchema != null) {
+ exportField(allocator, vector.getField(), provider, outSchema);
+ }
+
+ ArrayExporter exporter = new ArrayExporter(allocator);
+ exporter.export(out, vector, provider);
+ }
+
+ /**
+ * Export the current contents of a Java VectorSchemaRoot using the C data
+ * interface format.
+ * <p>
+ * The vector schema root is exported as if it were a struct array. The
+ * resulting ArrowArray struct keeps the record batch data and buffers alive
+ * until its release callback is called by the consumer.
+ *
+ * @param allocator Buffer allocator for allocating C data interface fields
+ * @param vsr Vector schema root to export
+ * @param provider Dictionary provider for dictionary encoded vectors
+ * (optional)
+ * @param out C struct where to export the record batch
+ */
+ public static void exportVectorSchemaRoot(BufferAllocator allocator, VectorSchemaRoot vsr,
+ DictionaryProvider provider, ArrowArray out) {
+ exportVectorSchemaRoot(allocator, vsr, provider, out, null);
+ }
+
+ /**
+ * Export the current contents of a Java VectorSchemaRoot using the C data
+ * interface format.
+ * <p>
+ * The vector schema root is exported as if it were a struct array. The
+ * resulting ArrowArray struct keeps the record batch data and buffers alive
+ * until its release callback is called by the consumer.
+ *
+ * @param allocator Buffer allocator for allocating C data interface fields
+ * @param vsr Vector schema root to export
+ * @param provider Dictionary provider for dictionary encoded vectors
+ * (optional)
+ * @param out C struct where to export the record batch
+ * @param outSchema C struct where to export the record batch schema (optional)
+ */
+ public static void exportVectorSchemaRoot(BufferAllocator allocator, VectorSchemaRoot vsr,
+ DictionaryProvider provider, ArrowArray out, ArrowSchema outSchema) {
+ if (outSchema != null) {
+ exportSchema(allocator, vsr.getSchema(), provider, outSchema);
+ }
+
+ VectorUnloader unloader = new VectorUnloader(vsr);
+ try (ArrowRecordBatch recordBatch = unloader.getRecordBatch()) {
+ StructVectorLoader loader = new StructVectorLoader(vsr.getSchema());
+ try (StructVector vector = loader.load(allocator, recordBatch)) {
+ exportVector(allocator, vector, provider, out);
+ }
+ }
+ }
+
+ /**
+ * Import Java Field from the C data interface.
+ * <p>
+ * The given ArrowSchema struct is released (as per the C data interface
+ * specification), even if this function fails.
+ *
+ * @param allocator Buffer allocator for allocating dictionary vectors
+ * @param schema C data interface struct representing the field [inout]
+ * @param provider A dictionary provider will be initialized with empty
+ * dictionary vectors (optional)
+ * @return Imported field object
+ */
+ public static Field importField(BufferAllocator allocator, ArrowSchema schema, CDataDictionaryProvider provider) {
+ try {
+ SchemaImporter importer = new SchemaImporter(allocator);
+ return importer.importField(schema, provider);
+ } finally {
+ schema.release();
+ schema.close();
+ }
+ }
+
+ /**
+ * Import Java Schema from the C data interface.
+ * <p>
+ * The given ArrowSchema struct is released (as per the C data interface
+ * specification), even if this function fails.
+ *
+ * @param allocator Buffer allocator for allocating dictionary vectors
+ * @param schema C data interface struct representing the field
+ * @param provider A dictionary provider will be initialized with empty
+ * dictionary vectors (optional)
+ * @return Imported schema object
+ */
+ public static Schema importSchema(BufferAllocator allocator, ArrowSchema schema, CDataDictionaryProvider provider) {
+ Field structField = importField(allocator, schema, provider);
+ if (structField.getType().getTypeID() != ArrowTypeID.Struct) {
+ throw new IllegalArgumentException("Cannot import schema: ArrowSchema describes non-struct type");
+ }
+ return new Schema(structField.getChildren(), structField.getMetadata());
+ }
+
+ /**
+ * Import Java vector from the C data interface.
+ * <p>
+ * The ArrowArray struct has its contents moved (as per the C data interface
+ * specification) to a private object held alive by the resulting array.
+ *
+ * @param allocator Buffer allocator
+ * @param array C data interface struct holding the array data
+ * @param vector Imported vector object [out]
+ * @param provider Dictionary provider to load dictionary vectors to (optional)
+ */
+ public static void importIntoVector(BufferAllocator allocator, ArrowArray array, FieldVector vector,
+ DictionaryProvider provider) {
+ ArrayImporter importer = new ArrayImporter(allocator, vector, provider);
+ importer.importArray(array);
+ }
+
+ /**
+ * Import Java vector and its type from the C data interface.
+ * <p>
+ * The ArrowArray struct has its contents moved (as per the C data interface
+ * specification) to a private object held alive by the resulting vector. The
+ * ArrowSchema struct is released, even if this function fails.
+ *
+ * @param allocator Buffer allocator for allocating the output FieldVector
+ * @param array C data interface struct holding the array data
+ * @param schema C data interface struct holding the array type
+ * @param provider Dictionary provider to load dictionary vectors to (optional)
+ * @return Imported vector object
+ */
+ public static FieldVector importVector(BufferAllocator allocator, ArrowArray array, ArrowSchema schema,
+ CDataDictionaryProvider provider) {
+ Field field = importField(allocator, schema, provider);
+ FieldVector vector = field.createVector(allocator);
+ importIntoVector(allocator, array, vector, provider);
+ return vector;
+ }
+
+ /**
+ * Import record batch from the C data interface into vector schema root.
+ *
+ * The ArrowArray struct has its contents moved (as per the C data interface
+ * specification) to a private object held alive by the resulting vector schema
+ * root.
+ *
+ * The schema of the vector schema root must match the input array (undefined
+ * behavior otherwise).
+ *
+ * @param allocator Buffer allocator
+ * @param array C data interface struct holding the record batch data
+ * @param root vector schema root to load into
+ * @param provider Dictionary provider to load dictionary vectors to (optional)
+ */
+ public static void importIntoVectorSchemaRoot(BufferAllocator allocator, ArrowArray array, VectorSchemaRoot root,
+ DictionaryProvider provider) {
+ try (StructVector structVector = StructVector.empty("", allocator)) {
+ structVector.initializeChildrenFromFields(root.getSchema().getFields());
+ importIntoVector(allocator, array, structVector, provider);
+ StructVectorUnloader unloader = new StructVectorUnloader(structVector);
+ VectorLoader loader = new VectorLoader(root);
+ try (ArrowRecordBatch recordBatch = unloader.getRecordBatch()) {
+ loader.load(recordBatch);
+ }
+ }
+ }
+
+ /**
+ * Import Java vector schema root from a C data interface Schema.
+ *
+ * The type represented by the ArrowSchema struct must be a struct type array.
+ *
+ * The ArrowSchema struct is released, even if this function fails.
+ *
+ * @param allocator Buffer allocator for allocating the output VectorSchemaRoot
+ * @param schema C data interface struct holding the record batch schema
+ * @param provider Dictionary provider to load dictionary vectors to (optional)
+ * @return Imported vector schema root
+ */
+ public static VectorSchemaRoot importVectorSchemaRoot(BufferAllocator allocator, ArrowSchema schema,
+ CDataDictionaryProvider provider) {
+ return importVectorSchemaRoot(allocator, null, schema, provider);
+ }
+
+ /**
+ * Import Java vector schema root from the C data interface.
+ *
+ * The type represented by the ArrowSchema struct must be a struct type array.
+ *
+ * The ArrowArray struct has its contents moved (as per the C data interface
+ * specification) to a private object held alive by the resulting record batch.
+ * The ArrowSchema struct is released, even if this function fails.
+ *
+ * Prefer {@link #importIntoVectorSchemaRoot} for loading array data while
+ * reusing the same vector schema root.
+ *
+ * @param allocator Buffer allocator for allocating the output VectorSchemaRoot
+ * @param array C data interface struct holding the record batch data
+ * (optional)
+ * @param schema C data interface struct holding the record batch schema
+ * @param provider Dictionary provider to load dictionary vectors to (optional)
+ * @return Imported vector schema root
+ */
+ public static VectorSchemaRoot importVectorSchemaRoot(BufferAllocator allocator, ArrowArray array, ArrowSchema schema,
+ CDataDictionaryProvider provider) {
+ VectorSchemaRoot vsr = VectorSchemaRoot.create(importSchema(allocator, schema, provider), allocator);
+ if (array != null) {
+ importIntoVectorSchemaRoot(allocator, array, vsr, provider);
+ }
+ return vsr;
+ }
+}
diff --git a/src/arrow/java/c/src/main/java/org/apache/arrow/c/Flags.java b/src/arrow/java/c/src/main/java/org/apache/arrow/c/Flags.java
new file mode 100644
index 000000000..744b4695a
--- /dev/null
+++ b/src/arrow/java/c/src/main/java/org/apache/arrow/c/Flags.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.c;
+
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeID;
+import org.apache.arrow.vector.types.pojo.Field;
+
+/**
+ * Flags as defined in the C data interface specification.
+ */
+final class Flags {
+ static final int ARROW_FLAG_DICTIONARY_ORDERED = 1;
+ static final int ARROW_FLAG_NULLABLE = 2;
+ static final int ARROW_FLAG_MAP_KEYS_SORTED = 4;
+
+ private Flags() {
+ }
+
+ static long forField(Field field) {
+ long flags = 0L;
+ if (field.isNullable()) {
+ flags |= ARROW_FLAG_NULLABLE;
+ }
+ if (field.getDictionary() != null && field.getDictionary().isOrdered()) {
+ flags |= ARROW_FLAG_DICTIONARY_ORDERED;
+ }
+ if (field.getType().getTypeID() == ArrowTypeID.Map) {
+ ArrowType.Map map = (ArrowType.Map) field.getType();
+ if (map.getKeysSorted()) {
+ flags |= ARROW_FLAG_MAP_KEYS_SORTED;
+ }
+ }
+ return flags;
+ }
+}
diff --git a/src/arrow/java/c/src/main/java/org/apache/arrow/c/Format.java b/src/arrow/java/c/src/main/java/org/apache/arrow/c/Format.java
new file mode 100644
index 000000000..315d3caad
--- /dev/null
+++ b/src/arrow/java/c/src/main/java/org/apache/arrow/c/Format.java
@@ -0,0 +1,340 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.c;
+
+import java.util.Arrays;
+import java.util.stream.Collectors;
+
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.types.DateUnit;
+import org.apache.arrow.vector.types.FloatingPointPrecision;
+import org.apache.arrow.vector.types.IntervalUnit;
+import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.UnionMode;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType;
+
+/**
+ * Conversion between {@link ArrowType} and string formats, as per C data
+ * interface specification.
+ */
+final class Format {
+
+ private Format() {
+ }
+
+ static String asString(ArrowType arrowType) {
+ if (arrowType instanceof ExtensionType) {
+ ArrowType innerType = ((ExtensionType) arrowType).storageType();
+ return asString(innerType);
+ }
+
+ switch (arrowType.getTypeID()) {
+ case Binary:
+ return "z";
+ case Bool:
+ return "b";
+ case Date: {
+ ArrowType.Date type = (ArrowType.Date) arrowType;
+ switch (type.getUnit()) {
+ case DAY:
+ return "tdD";
+ case MILLISECOND:
+ return "tdm";
+ default:
+ throw new UnsupportedOperationException(
+ String.format("Date type with unit %s is unsupported", type.getUnit()));
+ }
+ }
+ case Decimal: {
+ ArrowType.Decimal type = (ArrowType.Decimal) arrowType;
+ if (type.getBitWidth() == 128) {
+ return String.format("d:%d,%d", type.getPrecision(), type.getScale());
+ }
+ return String.format("d:%d,%d,%d", type.getPrecision(), type.getScale(), type.getBitWidth());
+ }
+ case Duration: {
+ ArrowType.Duration type = (ArrowType.Duration) arrowType;
+ switch (type.getUnit()) {
+ case SECOND:
+ return "tDs";
+ case MILLISECOND:
+ return "tDm";
+ case MICROSECOND:
+ return "tDu";
+ case NANOSECOND:
+ return "tDn";
+ default:
+ throw new UnsupportedOperationException(
+ String.format("Duration type with unit %s is unsupported", type.getUnit()));
+ }
+ }
+ case FixedSizeBinary: {
+ ArrowType.FixedSizeBinary type = (ArrowType.FixedSizeBinary) arrowType;
+ return String.format("w:%d", type.getByteWidth());
+ }
+ case FixedSizeList: {
+ ArrowType.FixedSizeList type = (ArrowType.FixedSizeList) arrowType;
+ return String.format("+w:%d", type.getListSize());
+ }
+ case FloatingPoint: {
+ ArrowType.FloatingPoint type = (ArrowType.FloatingPoint) arrowType;
+ switch (type.getPrecision()) {
+ case HALF:
+ return "e";
+ case SINGLE:
+ return "f";
+ case DOUBLE:
+ return "g";
+ default:
+ throw new UnsupportedOperationException(
+ String.format("FloatingPoint type with precision %s is unsupported", type.getPrecision()));
+ }
+ }
+ case Int: {
+ String format;
+ ArrowType.Int type = (ArrowType.Int) arrowType;
+ switch (type.getBitWidth()) {
+ case Byte.SIZE:
+ format = "C";
+ break;
+ case Short.SIZE:
+ format = "S";
+ break;
+ case Integer.SIZE:
+ format = "I";
+ break;
+ case Long.SIZE:
+ format = "L";
+ break;
+ default:
+ throw new UnsupportedOperationException(
+ String.format("Int type with bitwidth %d is unsupported", type.getBitWidth()));
+ }
+ if (type.getIsSigned()) {
+ format = format.toLowerCase();
+ }
+ return format;
+ }
+ case Interval: {
+ ArrowType.Interval type = (ArrowType.Interval) arrowType;
+ switch (type.getUnit()) {
+ case DAY_TIME:
+ return "tiD";
+ case YEAR_MONTH:
+ return "tiM";
+ default:
+ throw new UnsupportedOperationException(
+ String.format("Interval type with unit %s is unsupported", type.getUnit()));
+ }
+ }
+ case LargeBinary:
+ return "Z";
+ case LargeList:
+ return "+L";
+ case LargeUtf8:
+ return "U";
+ case List:
+ return "+l";
+ case Map:
+ return "+m";
+ case Null:
+ return "n";
+ case Struct:
+ return "+s";
+ case Time: {
+ ArrowType.Time type = (ArrowType.Time) arrowType;
+ if (type.getUnit() == TimeUnit.SECOND && type.getBitWidth() == 32) {
+ return "tts";
+ } else if (type.getUnit() == TimeUnit.MILLISECOND && type.getBitWidth() == 32) {
+ return "ttm";
+ } else if (type.getUnit() == TimeUnit.MICROSECOND && type.getBitWidth() == 64) {
+ return "ttu";
+ } else if (type.getUnit() == TimeUnit.NANOSECOND && type.getBitWidth() == 64) {
+ return "ttn";
+ } else {
+ throw new UnsupportedOperationException(String.format("Time type with unit %s and bitwidth %d is unsupported",
+ type.getUnit(), type.getBitWidth()));
+ }
+ }
+ case Timestamp: {
+ String format;
+ ArrowType.Timestamp type = (ArrowType.Timestamp) arrowType;
+ switch (type.getUnit()) {
+ case SECOND:
+ format = "tss";
+ break;
+ case MILLISECOND:
+ format = "tsm";
+ break;
+ case MICROSECOND:
+ format = "tsu";
+ break;
+ case NANOSECOND:
+ format = "tsn";
+ break;
+ default:
+ throw new UnsupportedOperationException(
+ String.format("Timestamp type with unit %s is unsupported", type.getUnit()));
+ }
+ String timezone = type.getTimezone();
+ return String.format("%s:%s", format, timezone == null ? "" : timezone);
+ }
+ case Union:
+ ArrowType.Union type = (ArrowType.Union) arrowType;
+ String typeIDs = Arrays.stream(type.getTypeIds()).mapToObj(String::valueOf).collect(Collectors.joining(","));
+ switch (type.getMode()) {
+ case Dense:
+ return String.format("+ud:%s", typeIDs);
+ case Sparse:
+ return String.format("+us:%s", typeIDs);
+ default:
+ throw new UnsupportedOperationException(
+ String.format("Union type with mode %s is unsupported", type.getMode()));
+ }
+ case Utf8:
+ return "u";
+ case NONE:
+ throw new IllegalArgumentException("Arrow type ID is NONE");
+ default:
+ throw new UnsupportedOperationException(String.format("Unknown type id %s", arrowType.getTypeID()));
+ }
+ }
+
+ static ArrowType asType(String format, long flags)
+ throws NumberFormatException, UnsupportedOperationException, IllegalStateException {
+ switch (format) {
+ case "n":
+ return new ArrowType.Null();
+ case "b":
+ return new ArrowType.Bool();
+ case "c":
+ return new ArrowType.Int(8, true);
+ case "C":
+ return new ArrowType.Int(8, false);
+ case "s":
+ return new ArrowType.Int(16, true);
+ case "S":
+ return new ArrowType.Int(16, false);
+ case "i":
+ return new ArrowType.Int(32, true);
+ case "I":
+ return new ArrowType.Int(32, false);
+ case "l":
+ return new ArrowType.Int(64, true);
+ case "L":
+ return new ArrowType.Int(64, false);
+ case "e":
+ return new ArrowType.FloatingPoint(FloatingPointPrecision.HALF);
+ case "f":
+ return new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE);
+ case "g":
+ return new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE);
+ case "z":
+ return new ArrowType.Binary();
+ case "Z":
+ return new ArrowType.LargeBinary();
+ case "u":
+ return new ArrowType.Utf8();
+ case "U":
+ return new ArrowType.LargeUtf8();
+ case "tdD":
+ return new ArrowType.Date(DateUnit.DAY);
+ case "tdm":
+ return new ArrowType.Date(DateUnit.MILLISECOND);
+ case "tts":
+ return new ArrowType.Time(TimeUnit.SECOND, 32);
+ case "ttm":
+ return new ArrowType.Time(TimeUnit.MILLISECOND, 32);
+ case "ttu":
+ return new ArrowType.Time(TimeUnit.MICROSECOND, 64);
+ case "ttn":
+ return new ArrowType.Time(TimeUnit.NANOSECOND, 64);
+ case "tDs":
+ return new ArrowType.Duration(TimeUnit.SECOND);
+ case "tDm":
+ return new ArrowType.Duration(TimeUnit.MILLISECOND);
+ case "tDu":
+ return new ArrowType.Duration(TimeUnit.MICROSECOND);
+ case "tDn":
+ return new ArrowType.Duration(TimeUnit.NANOSECOND);
+ case "tiM":
+ return new ArrowType.Interval(IntervalUnit.YEAR_MONTH);
+ case "tiD":
+ return new ArrowType.Interval(IntervalUnit.DAY_TIME);
+ case "+l":
+ return new ArrowType.List();
+ case "+L":
+ return new ArrowType.LargeList();
+ case "+s":
+ return new ArrowType.Struct();
+ case "+m":
+ boolean keysSorted = (flags & Flags.ARROW_FLAG_MAP_KEYS_SORTED) != 0;
+ return new ArrowType.Map(keysSorted);
+ default:
+ String[] parts = format.split(":", 2);
+ if (parts.length == 2) {
+ return parseComplexFormat(parts[0], parts[1]);
+ }
+ throw new UnsupportedOperationException(String.format("Format %s is not supported", format));
+ }
+ }
+
+ private static ArrowType parseComplexFormat(String format, String payload)
+ throws NumberFormatException, UnsupportedOperationException, IllegalStateException {
+ switch (format) {
+ case "d": {
+ int[] parts = payloadToIntArray(payload);
+ Preconditions.checkState(parts.length == 2 || parts.length == 3, "Format %s:%s is illegal", format, payload);
+ int precision = parts[0];
+ int scale = parts[1];
+ Integer bitWidth = (parts.length == 3) ? parts[2] : null;
+ return ArrowType.Decimal.createDecimal(precision, scale, bitWidth);
+ }
+ case "w":
+ return new ArrowType.FixedSizeBinary(Integer.parseInt(payload));
+ case "+w":
+ return new ArrowType.FixedSizeList(Integer.parseInt(payload));
+ case "+ud":
+ return new ArrowType.Union(UnionMode.Dense, payloadToIntArray(payload));
+ case "+us":
+ return new ArrowType.Union(UnionMode.Sparse, payloadToIntArray(payload));
+ case "tss":
+ return new ArrowType.Timestamp(TimeUnit.SECOND, payloadToTimezone(payload));
+ case "tsm":
+ return new ArrowType.Timestamp(TimeUnit.MILLISECOND, payloadToTimezone(payload));
+ case "tsu":
+ return new ArrowType.Timestamp(TimeUnit.MICROSECOND, payloadToTimezone(payload));
+ case "tsn":
+ return new ArrowType.Timestamp(TimeUnit.NANOSECOND, payloadToTimezone(payload));
+ default:
+ throw new UnsupportedOperationException(String.format("Format %s:%s is not supported", format, payload));
+ }
+ }
+
+ private static int[] payloadToIntArray(String payload) throws NumberFormatException {
+ return Arrays.stream(payload.split(",")).mapToInt(Integer::parseInt).toArray();
+ }
+
+ private static String payloadToTimezone(String payload) {
+ if (payload.isEmpty()) {
+ return null;
+ }
+ return payload;
+ }
+}
diff --git a/src/arrow/java/c/src/main/java/org/apache/arrow/c/Metadata.java b/src/arrow/java/c/src/main/java/org/apache/arrow/c/Metadata.java
new file mode 100644
index 000000000..b81b24fe4
--- /dev/null
+++ b/src/arrow/java/c/src/main/java/org/apache/arrow/c/Metadata.java
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.c;
+
+import static org.apache.arrow.c.NativeUtil.NULL;
+import static org.apache.arrow.util.Preconditions.checkState;
+
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.util.MemoryUtil;
+
+/**
+ * Encode and decode metadata.
+ */
+final class Metadata {
+
+ private Metadata() {
+ }
+
+ static ArrowBuf encode(BufferAllocator allocator, Map<String, String> metadata) {
+ if (metadata == null || metadata.size() == 0) {
+ return null;
+ }
+
+ List<byte[]> buffers = new ArrayList<>(metadata.size() * 2);
+ int totalSize = 4 + metadata.size() * 8; // number of key/value pairs + buffer length fields
+ for (Map.Entry<String, String> entry : metadata.entrySet()) {
+ byte[] keyBuffer = entry.getKey().getBytes(StandardCharsets.UTF_8);
+ byte[] valueBuffer = entry.getValue().getBytes(StandardCharsets.UTF_8);
+ totalSize += keyBuffer.length;
+ totalSize += valueBuffer.length;
+ buffers.add(keyBuffer);
+ buffers.add(valueBuffer);
+ }
+
+ ArrowBuf result = allocator.buffer(totalSize);
+ ByteBuffer writer = MemoryUtil.directBuffer(result.memoryAddress(), totalSize).order(ByteOrder.nativeOrder());
+ writer.putInt(metadata.size());
+ for (byte[] buffer : buffers) {
+ writer.putInt(buffer.length);
+ writer.put(buffer);
+ }
+ return result.slice(0, totalSize);
+ }
+
+ static Map<String, String> decode(long bufferAddress) {
+ if (bufferAddress == NULL) {
+ return null;
+ }
+
+ ByteBuffer reader = MemoryUtil.directBuffer(bufferAddress, Integer.MAX_VALUE).order(ByteOrder.nativeOrder());
+
+ int size = reader.getInt();
+ checkState(size >= 0, "Metadata size must not be negative");
+ if (size == 0) {
+ return null;
+ }
+
+ Map<String, String> result = new HashMap<>(size);
+ for (int i = 0; i < size; i++) {
+ String key = readString(reader);
+ String value = readString(reader);
+ result.put(key, value);
+ }
+ return result;
+ }
+
+ private static String readString(ByteBuffer reader) {
+ int length = reader.getInt();
+ checkState(length >= 0, "Metadata item length must not be negative");
+ String result = "";
+ if (length > 0) {
+ byte[] dst = new byte[length];
+ reader.get(dst);
+ result = new String(dst, StandardCharsets.UTF_8);
+ }
+ return result;
+ }
+}
diff --git a/src/arrow/java/c/src/main/java/org/apache/arrow/c/NativeUtil.java b/src/arrow/java/c/src/main/java/org/apache/arrow/c/NativeUtil.java
new file mode 100644
index 000000000..e2feda1e5
--- /dev/null
+++ b/src/arrow/java/c/src/main/java/org/apache/arrow/c/NativeUtil.java
@@ -0,0 +1,139 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.c;
+
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.charset.StandardCharsets;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.util.MemoryUtil;
+
+/**
+ * Utility functions for working with native memory.
+ */
+public final class NativeUtil {
+ public static final byte NULL = 0;
+ static final int MAX_STRING_LENGTH = Short.MAX_VALUE;
+
+ private NativeUtil() {
+ }
+
+ /**
+ * Convert a pointer to a null terminated string into a Java String.
+ *
+ * @param cstringPtr pointer to C string
+ * @return Converted string
+ */
+ public static String toJavaString(long cstringPtr) {
+ if (cstringPtr == NULL) {
+ return null;
+ }
+ ByteBuffer reader = MemoryUtil.directBuffer(cstringPtr, MAX_STRING_LENGTH).order(ByteOrder.nativeOrder());
+
+ int length = 0;
+ while (reader.get() != NULL) {
+ length++;
+ }
+ byte[] bytes = new byte[length];
+ ((ByteBuffer) reader.rewind()).get(bytes);
+ return new String(bytes, 0, length, StandardCharsets.UTF_8);
+ }
+
+ /**
+ * Convert a native array pointer (void**) to Java array of pointers.
+ *
+ * @param arrayPtr Array pointer
+ * @param size Array size
+ * @return Array of pointer values as longs
+ */
+ public static long[] toJavaArray(long arrayPtr, int size) {
+ if (arrayPtr == NULL) {
+ return null;
+ }
+ if (size < 0) {
+ throw new IllegalArgumentException("Invalid native array size");
+ }
+
+ long[] result = new long[size];
+ ByteBuffer reader = MemoryUtil.directBuffer(arrayPtr, Long.BYTES * size).order(ByteOrder.nativeOrder());
+ for (int i = 0; i < size; i++) {
+ result[i] = reader.getLong();
+ }
+ return result;
+ }
+
+ /**
+ * Convert Java string to a null terminated string.
+ *
+ * @param allocator Buffer allocator for allocating the native string
+ * @param string Input String to convert
+ * @return Buffer with a null terminated string or null if the input is null
+ */
+ public static ArrowBuf toNativeString(BufferAllocator allocator, String string) {
+ if (string == null) {
+ return null;
+ }
+
+ byte[] bytes = string.getBytes(StandardCharsets.UTF_8);
+ ArrowBuf buffer = allocator.buffer(bytes.length + 1);
+ buffer.writeBytes(bytes);
+ buffer.writeByte(NULL);
+ return buffer;
+ }
+
+ /**
+ * Close a buffer if it's not null.
+ *
+ * @param buf Buffer to close
+ */
+ public static void closeBuffer(ArrowBuf buf) {
+ if (buf != null) {
+ buf.close();
+ }
+ }
+
+ /**
+ * Get the address of a buffer or {@value #NULL} if the input buffer is null.
+ *
+ * @param buf Buffer to get the address of
+ * @return Memory addresss or {@value #NULL}
+ */
+ public static long addressOrNull(ArrowBuf buf) {
+ if (buf == null) {
+ return NULL;
+ }
+ return buf.memoryAddress();
+ }
+
+ /**
+ * Get the address of a C Data Interface struct or {@value #NULL} if the input
+ * struct is null.
+ *
+ * @param struct C Data Interface struct to get the address of
+ * @return Memory addresss or {@value #NULL}
+ */
+ public static long addressOrNull(BaseStruct struct) {
+ if (struct == null) {
+ return NULL;
+ }
+ return struct.memoryAddress();
+ }
+
+}
diff --git a/src/arrow/java/c/src/main/java/org/apache/arrow/c/SchemaExporter.java b/src/arrow/java/c/src/main/java/org/apache/arrow/c/SchemaExporter.java
new file mode 100644
index 000000000..04d41a4e4
--- /dev/null
+++ b/src/arrow/java/c/src/main/java/org/apache/arrow/c/SchemaExporter.java
@@ -0,0 +1,132 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.c;
+
+import static org.apache.arrow.c.NativeUtil.NULL;
+import static org.apache.arrow.c.NativeUtil.addressOrNull;
+import static org.apache.arrow.util.Preconditions.checkNotNull;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.arrow.c.jni.JniWrapper;
+import org.apache.arrow.c.jni.PrivateData;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+import org.apache.arrow.vector.types.pojo.Field;
+
+/**
+ * Exporter for {@link ArrowSchema}.
+ */
+final class SchemaExporter {
+ private final BufferAllocator allocator;
+
+ public SchemaExporter(BufferAllocator allocator) {
+ this.allocator = allocator;
+ }
+
+ /**
+ * Private data structure for exported schemas.
+ */
+ static class ExportedSchemaPrivateData implements PrivateData {
+ ArrowBuf format;
+ ArrowBuf name;
+ ArrowBuf metadata;
+ ArrowBuf children_ptrs;
+ ArrowSchema dictionary;
+ List<ArrowSchema> children;
+
+ @Override
+ public void close() {
+ NativeUtil.closeBuffer(format);
+ NativeUtil.closeBuffer(name);
+ NativeUtil.closeBuffer(metadata);
+ NativeUtil.closeBuffer(children_ptrs);
+ if (dictionary != null) {
+ dictionary.close();
+ }
+ if (children != null) {
+ for (ArrowSchema child : children) {
+ child.close();
+ }
+ }
+ }
+ }
+
+ void export(ArrowSchema schema, Field field, DictionaryProvider dictionaryProvider) {
+ String name = field.getName();
+ String format = Format.asString(field.getType());
+ long flags = Flags.forField(field);
+ List<Field> children = field.getChildren();
+ DictionaryEncoding dictionaryEncoding = field.getDictionary();
+
+ ExportedSchemaPrivateData data = new ExportedSchemaPrivateData();
+ try {
+ data.format = NativeUtil.toNativeString(allocator, format);
+ data.name = NativeUtil.toNativeString(allocator, name);
+ data.metadata = Metadata.encode(allocator, field.getMetadata());
+
+ if (children != null) {
+ data.children = new ArrayList<>(children.size());
+ data.children_ptrs = allocator.buffer((long) children.size() * Long.BYTES);
+ for (int i = 0; i < children.size(); i++) {
+ ArrowSchema child = ArrowSchema.allocateNew(allocator);
+ data.children.add(child);
+ data.children_ptrs.writeLong(child.memoryAddress());
+ }
+ }
+
+ if (dictionaryEncoding != null) {
+ Dictionary dictionary = dictionaryProvider.lookup(dictionaryEncoding.getId());
+ checkNotNull(dictionary, "Dictionary lookup failed on export of field with dictionary");
+
+ data.dictionary = ArrowSchema.allocateNew(allocator);
+ export(data.dictionary, dictionary.getVector().getField(), dictionaryProvider);
+ }
+
+ ArrowSchema.Snapshot snapshot = new ArrowSchema.Snapshot();
+ snapshot.format = data.format.memoryAddress();
+ snapshot.name = addressOrNull(data.name);
+ snapshot.metadata = addressOrNull(data.metadata);
+ snapshot.flags = flags;
+ snapshot.n_children = (data.children != null) ? data.children.size() : 0;
+ snapshot.children = addressOrNull(data.children_ptrs);
+ snapshot.dictionary = addressOrNull(data.dictionary);
+ snapshot.release = NULL;
+ schema.save(snapshot);
+
+ // sets release and private data
+ JniWrapper.get().exportSchema(schema.memoryAddress(), data);
+ } catch (Exception e) {
+ data.close();
+ throw e;
+ }
+
+ // Export children
+ if (children != null) {
+ for (int i = 0; i < children.size(); i++) {
+ Field childField = children.get(i);
+ ArrowSchema child = data.children.get(i);
+ export(child, childField, dictionaryProvider);
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/c/src/main/java/org/apache/arrow/c/SchemaImporter.java b/src/arrow/java/c/src/main/java/org/apache/arrow/c/SchemaImporter.java
new file mode 100644
index 000000000..21d88f6cd
--- /dev/null
+++ b/src/arrow/java/c/src/main/java/org/apache/arrow/c/SchemaImporter.java
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.c;
+
+import static org.apache.arrow.c.NativeUtil.NULL;
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+import static org.apache.arrow.util.Preconditions.checkNotNull;
+import static org.apache.arrow.util.Preconditions.checkState;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+import org.apache.arrow.vector.types.pojo.ExtensionTypeRegistry;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Importer for {@link ArrowSchema}.
+ */
+final class SchemaImporter {
+ private static final Logger logger = LoggerFactory.getLogger(SchemaImporter.class);
+
+ private static final int MAX_IMPORT_RECURSION_LEVEL = 64;
+ private long nextDictionaryID = 1L;
+
+ private final BufferAllocator allocator;
+
+ public SchemaImporter(BufferAllocator allocator) {
+ this.allocator = allocator;
+ }
+
+ Field importField(ArrowSchema schema, CDataDictionaryProvider provider) {
+ return importField(schema, provider, 0);
+ }
+
+ private Field importField(ArrowSchema schema, CDataDictionaryProvider provider, int recursionLevel) {
+ checkState(recursionLevel <= MAX_IMPORT_RECURSION_LEVEL, "Recursion level in ArrowSchema struct exceeded");
+
+ ArrowSchema.Snapshot snapshot = schema.snapshot();
+ checkState(snapshot.release != NULL, "Cannot import released ArrowSchema");
+
+ String name = NativeUtil.toJavaString(snapshot.name);
+ String format = NativeUtil.toJavaString(snapshot.format);
+ checkNotNull(format, "format field must not be null");
+ ArrowType arrowType = Format.asType(format, snapshot.flags);
+ boolean nullable = (snapshot.flags & Flags.ARROW_FLAG_NULLABLE) != 0;
+ Map<String, String> metadata = Metadata.decode(snapshot.metadata);
+
+ if (metadata != null && metadata.containsKey(ExtensionType.EXTENSION_METADATA_KEY_NAME)) {
+ final String extensionName = metadata.get(ExtensionType.EXTENSION_METADATA_KEY_NAME);
+ final String extensionMetadata = metadata.getOrDefault(ExtensionType.EXTENSION_METADATA_KEY_METADATA, "");
+ ExtensionType extensionType = ExtensionTypeRegistry.lookup(extensionName);
+ if (extensionType != null) {
+ arrowType = extensionType.deserialize(arrowType, extensionMetadata);
+ } else {
+ // Otherwise, we haven't registered the type
+ logger.info("Unrecognized extension type: {}", extensionName);
+ }
+ }
+
+ // Handle dictionary encoded vectors
+ DictionaryEncoding dictionaryEncoding = null;
+ if (snapshot.dictionary != NULL && provider != null) {
+ boolean ordered = (snapshot.flags & Flags.ARROW_FLAG_DICTIONARY_ORDERED) != 0;
+ ArrowType.Int indexType = (ArrowType.Int) arrowType;
+ dictionaryEncoding = new DictionaryEncoding(nextDictionaryID++, ordered, indexType);
+
+ ArrowSchema dictionarySchema = ArrowSchema.wrap(snapshot.dictionary);
+ Field dictionaryField = importField(dictionarySchema, provider, recursionLevel + 1);
+ provider.put(new Dictionary(dictionaryField.createVector(allocator), dictionaryEncoding));
+ }
+
+ FieldType fieldType = new FieldType(nullable, arrowType, dictionaryEncoding, metadata);
+
+ List<Field> children = null;
+ long[] childrenIds = NativeUtil.toJavaArray(snapshot.children, checkedCastToInt(snapshot.n_children));
+ if (childrenIds != null && childrenIds.length > 0) {
+ children = new ArrayList<>(childrenIds.length);
+ for (long childAddress : childrenIds) {
+ ArrowSchema childSchema = ArrowSchema.wrap(childAddress);
+ Field field = importField(childSchema, provider, recursionLevel + 1);
+ children.add(field);
+ }
+ }
+ return new Field(name, fieldType, children);
+ }
+}
diff --git a/src/arrow/java/c/src/main/java/org/apache/arrow/c/jni/JniLoader.java b/src/arrow/java/c/src/main/java/org/apache/arrow/c/jni/JniLoader.java
new file mode 100644
index 000000000..bd2008f05
--- /dev/null
+++ b/src/arrow/java/c/src/main/java/org/apache/arrow/c/jni/JniLoader.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.c.jni;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.StandardCopyOption;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * The JniLoader for C Data Interface API's native implementation.
+ */
+public class JniLoader {
+ private static final JniLoader INSTANCE = new JniLoader(Collections.singletonList("arrow_cdata_jni"));
+
+ public static JniLoader get() {
+ return INSTANCE;
+ }
+
+ private final Set<String> librariesToLoad;
+
+ private JniLoader(List<String> libraryNames) {
+ librariesToLoad = new HashSet<>(libraryNames);
+ }
+
+ private boolean finished() {
+ return librariesToLoad.isEmpty();
+ }
+
+ /**
+ * If required JNI libraries are not loaded, then load them.
+ */
+ public void ensureLoaded() {
+ if (finished()) {
+ return;
+ }
+ loadRemaining();
+ }
+
+ private synchronized void loadRemaining() {
+ // The method is protected by a mutex via synchronized, if more than one thread
+ // race to call
+ // loadRemaining, at same time only one will do the actual loading and the
+ // others will wait for
+ // the mutex to be acquired then check on the remaining list: if there are
+ // libraries that were not
+ // successfully loaded then the mutex owner will try to load them again.
+ if (finished()) {
+ return;
+ }
+ List<String> libs = new ArrayList<>(librariesToLoad);
+ for (String lib : libs) {
+ load(lib);
+ librariesToLoad.remove(lib);
+ }
+ }
+
+ private void load(String name) {
+ final String libraryToLoad = System.mapLibraryName(name);
+ try {
+ File temp = File.createTempFile("jnilib-", ".tmp", new File(System.getProperty("java.io.tmpdir")));
+ try (final InputStream is = JniWrapper.class.getClassLoader().getResourceAsStream(libraryToLoad)) {
+ if (is == null) {
+ throw new FileNotFoundException(libraryToLoad);
+ }
+ Files.copy(is, temp.toPath(), StandardCopyOption.REPLACE_EXISTING);
+ System.load(temp.getAbsolutePath());
+ }
+ } catch (IOException e) {
+ throw new IllegalStateException("error loading native libraries: " + e);
+ }
+ }
+}
diff --git a/src/arrow/java/c/src/main/java/org/apache/arrow/c/jni/JniWrapper.java b/src/arrow/java/c/src/main/java/org/apache/arrow/c/jni/JniWrapper.java
new file mode 100644
index 000000000..04a143a7a
--- /dev/null
+++ b/src/arrow/java/c/src/main/java/org/apache/arrow/c/jni/JniWrapper.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.c.jni;
+
+/**
+ * JniWrapper for C Data Interface API implementation.
+ */
+public class JniWrapper {
+ private static final JniWrapper INSTANCE = new JniWrapper();
+
+ public static JniWrapper get() {
+ return INSTANCE;
+ }
+
+ private JniWrapper() {
+ // A best effort to error on 32-bit systems
+ String dataModel = System.getProperty("sun.arch.data.model");
+ if (dataModel != null && dataModel.equals("32")) {
+ throw new UnsupportedOperationException(
+ "The Java C Data Interface implementation is currently only supported on 64-bit systems");
+ }
+ JniLoader.get().ensureLoaded();
+ }
+
+ public native void releaseSchema(long memoryAddress);
+
+ public native void releaseArray(long memoryAddress);
+
+ public native void exportSchema(long memoryAddress, PrivateData privateData);
+
+ public native void exportArray(long memoryAddress, PrivateData data);
+}
diff --git a/src/arrow/java/c/src/main/java/org/apache/arrow/c/jni/PrivateData.java b/src/arrow/java/c/src/main/java/org/apache/arrow/c/jni/PrivateData.java
new file mode 100644
index 000000000..e6336cc64
--- /dev/null
+++ b/src/arrow/java/c/src/main/java/org/apache/arrow/c/jni/PrivateData.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.c.jni;
+
+import java.io.Closeable;
+
+/**
+ * Interface for Java objects stored in C data interface private data.
+ * <p>
+ * This interface is used for exported structures.
+ */
+public interface PrivateData extends Closeable {
+
+ @Override
+ void close();
+}
diff --git a/src/arrow/java/c/src/main/java/org/apache/arrow/vector/StructVectorLoader.java b/src/arrow/java/c/src/main/java/org/apache/arrow/vector/StructVectorLoader.java
new file mode 100644
index 000000000..eab7e491f
--- /dev/null
+++ b/src/arrow/java/c/src/main/java/org/apache/arrow/vector/StructVectorLoader.java
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.util.Preconditions.checkArgument;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.util.Collections2;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.compression.CompressionCodec;
+import org.apache.arrow.vector.compression.CompressionUtil;
+import org.apache.arrow.vector.compression.NoCompressionCodec;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+/**
+ * Loads buffers into {@link StructVector}.
+ */
+public class StructVectorLoader {
+
+ private final Schema schema;
+ private final CompressionCodec.Factory factory;
+
+ /**
+ * A flag indicating if decompression is needed. This will affect the behavior
+ * of releasing buffers.
+ */
+ private boolean decompressionNeeded;
+
+ /**
+ * Construct with a schema.
+ *
+ * @param schema buffers are added based on schema.
+ */
+ public StructVectorLoader(Schema schema) {
+ this(schema, NoCompressionCodec.Factory.INSTANCE);
+ }
+
+ /**
+ * Construct with a schema and a compression codec factory.
+ *
+ * @param schema buffers are added based on schema.
+ * @param factory the factory to create codec.
+ */
+ public StructVectorLoader(Schema schema, CompressionCodec.Factory factory) {
+ this.schema = schema;
+ this.factory = factory;
+ }
+
+ /**
+ * Loads the record batch into the struct vector.
+ *
+ * <p>
+ * This will not close the record batch.
+ *
+ * @param recordBatch the batch to load
+ */
+ public StructVector load(BufferAllocator allocator, ArrowRecordBatch recordBatch) {
+ StructVector result = StructVector.empty("", allocator);
+ result.initializeChildrenFromFields(this.schema.getFields());
+
+ Iterator<ArrowBuf> buffers = recordBatch.getBuffers().iterator();
+ Iterator<ArrowFieldNode> nodes = recordBatch.getNodes().iterator();
+ CompressionUtil.CodecType codecType = CompressionUtil.CodecType
+ .fromCompressionType(recordBatch.getBodyCompression().getCodec());
+ decompressionNeeded = codecType != CompressionUtil.CodecType.NO_COMPRESSION;
+ CompressionCodec codec = decompressionNeeded ? factory.createCodec(codecType) : NoCompressionCodec.INSTANCE;
+ for (FieldVector fieldVector : result.getChildrenFromFields()) {
+ loadBuffers(fieldVector, fieldVector.getField(), buffers, nodes, codec);
+ }
+ result.loadFieldBuffers(new ArrowFieldNode(recordBatch.getLength(), 0), Collections.singletonList(null));
+ if (nodes.hasNext() || buffers.hasNext()) {
+ throw new IllegalArgumentException("not all nodes and buffers were consumed. nodes: " +
+ Collections2.toList(nodes).toString() + " buffers: " + Collections2.toList(buffers).toString());
+ }
+ return result;
+ }
+
+ private void loadBuffers(FieldVector vector, Field field, Iterator<ArrowBuf> buffers, Iterator<ArrowFieldNode> nodes,
+ CompressionCodec codec) {
+ checkArgument(nodes.hasNext(), "no more field nodes for for field %s and vector %s", field, vector);
+ ArrowFieldNode fieldNode = nodes.next();
+ int bufferLayoutCount = TypeLayout.getTypeBufferCount(field.getType());
+ List<ArrowBuf> ownBuffers = new ArrayList<>(bufferLayoutCount);
+ for (int j = 0; j < bufferLayoutCount; j++) {
+ ArrowBuf nextBuf = buffers.next();
+ // for vectors without nulls, the buffer is empty, so there is no need to
+ // decompress it.
+ ArrowBuf bufferToAdd = nextBuf.writerIndex() > 0 ? codec.decompress(vector.getAllocator(), nextBuf) : nextBuf;
+ ownBuffers.add(bufferToAdd);
+ if (decompressionNeeded) {
+ // decompression performed
+ nextBuf.getReferenceManager().retain();
+ }
+ }
+ try {
+ vector.loadFieldBuffers(fieldNode, ownBuffers);
+ if (decompressionNeeded) {
+ for (ArrowBuf buf : ownBuffers) {
+ buf.close();
+ }
+ }
+ } catch (RuntimeException e) {
+ throw new IllegalArgumentException(
+ "Could not load buffers for field " + field + ". error message: " + e.getMessage(), e);
+ }
+ List<Field> children = field.getChildren();
+ if (children.size() > 0) {
+ List<FieldVector> childrenFromFields = vector.getChildrenFromFields();
+ checkArgument(children.size() == childrenFromFields.size(),
+ "should have as many children as in the schema: found %s expected %s", childrenFromFields.size(),
+ children.size());
+ for (int i = 0; i < childrenFromFields.size(); i++) {
+ Field child = children.get(i);
+ FieldVector fieldVector = childrenFromFields.get(i);
+ loadBuffers(fieldVector, child, buffers, nodes, codec);
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/c/src/main/java/org/apache/arrow/vector/StructVectorUnloader.java b/src/arrow/java/c/src/main/java/org/apache/arrow/vector/StructVectorUnloader.java
new file mode 100644
index 000000000..e75156cf2
--- /dev/null
+++ b/src/arrow/java/c/src/main/java/org/apache/arrow/vector/StructVectorUnloader.java
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.compression.CompressionCodec;
+import org.apache.arrow.vector.compression.CompressionUtil;
+import org.apache.arrow.vector.compression.NoCompressionCodec;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+
+/**
+ * Helper class that handles converting a {@link StructVector} to a
+ * {@link ArrowRecordBatch}.
+ */
+public class StructVectorUnloader {
+
+ private final StructVector root;
+ private final boolean includeNullCount;
+ private final CompressionCodec codec;
+ private final boolean alignBuffers;
+
+ /**
+ * Constructs a new instance of the given struct vector.
+ */
+ public StructVectorUnloader(StructVector root) {
+ this(root, true, NoCompressionCodec.INSTANCE, true);
+ }
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param root The struct vector to serialize to an
+ * {@link ArrowRecordBatch}.
+ * @param includeNullCount Controls whether null count is copied to the
+ * {@link ArrowRecordBatch}
+ * @param alignBuffers Controls if buffers get aligned to 8-byte boundaries.
+ */
+ public StructVectorUnloader(StructVector root, boolean includeNullCount, boolean alignBuffers) {
+ this(root, includeNullCount, NoCompressionCodec.INSTANCE, alignBuffers);
+ }
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param root The struct vector to serialize to an
+ * {@link ArrowRecordBatch}.
+ * @param includeNullCount Controls whether null count is copied to the
+ * {@link ArrowRecordBatch}
+ * @param codec the codec for compressing data. If it is null, then
+ * no compression is needed.
+ * @param alignBuffers Controls if buffers get aligned to 8-byte boundaries.
+ */
+ public StructVectorUnloader(StructVector root, boolean includeNullCount, CompressionCodec codec,
+ boolean alignBuffers) {
+ this.root = root;
+ this.includeNullCount = includeNullCount;
+ this.codec = codec;
+ this.alignBuffers = alignBuffers;
+ }
+
+ /**
+ * Performs the depth first traversal of the Vectors to create an
+ * {@link ArrowRecordBatch} suitable for serialization.
+ */
+ public ArrowRecordBatch getRecordBatch() {
+ List<ArrowFieldNode> nodes = new ArrayList<>();
+ List<ArrowBuf> buffers = new ArrayList<>();
+ for (FieldVector vector : root.getChildrenFromFields()) {
+ appendNodes(vector, nodes, buffers);
+ }
+ return new ArrowRecordBatch(root.getValueCount(), nodes, buffers, CompressionUtil.createBodyCompression(codec),
+ alignBuffers);
+ }
+
+ private void appendNodes(FieldVector vector, List<ArrowFieldNode> nodes, List<ArrowBuf> buffers) {
+ nodes.add(new ArrowFieldNode(vector.getValueCount(), includeNullCount ? vector.getNullCount() : -1));
+ List<ArrowBuf> fieldBuffers = vector.getFieldBuffers();
+ int expectedBufferCount = TypeLayout.getTypeBufferCount(vector.getField().getType());
+ if (fieldBuffers.size() != expectedBufferCount) {
+ throw new IllegalArgumentException(String.format("wrong number of buffers for field %s in vector %s. found: %s",
+ vector.getField(), vector.getClass().getSimpleName(), fieldBuffers));
+ }
+ for (ArrowBuf buf : fieldBuffers) {
+ buffers.add(codec.compress(vector.getAllocator(), buf));
+ }
+ for (FieldVector child : vector.getChildrenFromFields()) {
+ appendNodes(child, nodes, buffers);
+ }
+ }
+}
diff --git a/src/arrow/java/c/src/test/java/org/apache/arrow/c/DictionaryTest.java b/src/arrow/java/c/src/test/java/org/apache/arrow/c/DictionaryTest.java
new file mode 100644
index 000000000..3f793f836
--- /dev/null
+++ b/src/arrow/java/c/src/test/java/org/apache/arrow/c/DictionaryTest.java
@@ -0,0 +1,219 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.c;
+
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.channels.Channels;
+import java.util.Collections;
+
+import org.apache.arrow.c.ArrowArray;
+import org.apache.arrow.c.ArrowSchema;
+import org.apache.arrow.c.CDataDictionaryProvider;
+import org.apache.arrow.c.Data;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.util.AutoCloseables;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.compare.VectorEqualsVisitor;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryEncoder;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.ipc.ArrowStreamReader;
+import org.apache.arrow.vector.ipc.ArrowStreamWriter;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+public class DictionaryTest {
+ private RootAllocator allocator = null;
+
+ @BeforeEach
+ public void setUp() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ }
+
+ @AfterEach
+ public void tearDown() {
+ allocator.close();
+ }
+
+ void roundtrip(FieldVector vector, DictionaryProvider provider, Class<?> clazz) {
+ // Consumer allocates empty structures
+ try (ArrowSchema consumerArrowSchema = ArrowSchema.allocateNew(allocator);
+ ArrowArray consumerArrowArray = ArrowArray.allocateNew(allocator)) {
+
+ // Producer creates structures from existing memory pointers
+ try (ArrowSchema arrowSchema = ArrowSchema.wrap(consumerArrowSchema.memoryAddress());
+ ArrowArray arrowArray = ArrowArray.wrap(consumerArrowArray.memoryAddress())) {
+ // Producer exports vector into the C Data Interface structures
+ Data.exportVector(allocator, vector, provider, arrowArray, arrowSchema);
+ }
+
+ // Consumer imports vector
+ try (CDataDictionaryProvider cDictionaryProvider = new CDataDictionaryProvider();
+ FieldVector imported = Data.importVector(allocator, consumerArrowArray, consumerArrowSchema,
+ cDictionaryProvider);) {
+ assertTrue(clazz.isInstance(imported), String.format("expected %s but was %s", clazz, imported.getClass()));
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector, imported), "vectors are not equivalent");
+ for (long id : cDictionaryProvider.getDictionaryIds()) {
+ ValueVector exportedDictionaryVector = provider.lookup(id).getVector();
+ ValueVector importedDictionaryVector = cDictionaryProvider.lookup(id).getVector();
+ assertTrue(VectorEqualsVisitor.vectorEquals(exportedDictionaryVector, importedDictionaryVector),
+ String.format("Dictionary vectors for ID %d are not equivalent", id));
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testWithDictionary() throws Exception {
+ DictionaryProvider.MapDictionaryProvider provider = new DictionaryProvider.MapDictionaryProvider();
+ // create dictionary and provider
+ final VarCharVector dictVector = new VarCharVector("dict", allocator);
+ dictVector.allocateNewSafe();
+ dictVector.setSafe(0, "aa".getBytes());
+ dictVector.setSafe(1, "bb".getBytes());
+ dictVector.setSafe(2, "cc".getBytes());
+ dictVector.setValueCount(3);
+
+ Dictionary dictionary = new Dictionary(dictVector, new DictionaryEncoding(1L, false, /* indexType= */null));
+ provider.put(dictionary);
+
+ // create vector and encode it
+ final VarCharVector vector = new VarCharVector("vector", allocator);
+ vector.allocateNewSafe();
+ vector.setSafe(0, "bb".getBytes());
+ vector.setSafe(1, "bb".getBytes());
+ vector.setSafe(2, "cc".getBytes());
+ vector.setSafe(3, "aa".getBytes());
+ vector.setValueCount(4);
+
+ // get the encoded vector
+ IntVector encodedVector = (IntVector) DictionaryEncoder.encode(vector, dictionary);
+
+ // Perform roundtrip using C Data Interface
+ roundtrip(encodedVector, provider, IntVector.class);
+
+ // Close all
+ AutoCloseables.close((AutoCloseable) vector, encodedVector, dictVector);
+ }
+
+ @Test
+ public void testRoundtripMultipleBatches() throws IOException {
+ try (ArrowStreamReader reader = createMultiBatchReader();
+ ArrowSchema consumerArrowSchema = ArrowSchema.allocateNew(allocator)) {
+ // Load first batch
+ reader.loadNextBatch();
+ // Producer fills consumer schema stucture
+ Data.exportSchema(allocator, reader.getVectorSchemaRoot().getSchema(), reader, consumerArrowSchema);
+ // Consumer loads it as an empty vector schema root
+ try (CDataDictionaryProvider consumerDictionaryProvider = new CDataDictionaryProvider();
+ VectorSchemaRoot consumerRoot = Data.importVectorSchemaRoot(allocator, consumerArrowSchema,
+ consumerDictionaryProvider)) {
+ do {
+ try (ArrowArray consumerArray = ArrowArray.allocateNew(allocator)) {
+ // Producer exports next data
+ Data.exportVectorSchemaRoot(allocator, reader.getVectorSchemaRoot(), reader, consumerArray);
+ // Consumer loads next data
+ Data.importIntoVectorSchemaRoot(allocator, consumerArray, consumerRoot, consumerDictionaryProvider);
+
+ // Roundtrip validation
+ assertTrue(consumerRoot.equals(reader.getVectorSchemaRoot()), "vector schema roots are not equivalent");
+ for (long id : consumerDictionaryProvider.getDictionaryIds()) {
+ ValueVector exportedDictionaryVector = reader.lookup(id).getVector();
+ ValueVector importedDictionaryVector = consumerDictionaryProvider.lookup(id).getVector();
+ assertTrue(VectorEqualsVisitor.vectorEquals(exportedDictionaryVector, importedDictionaryVector),
+ String.format("Dictionary vectors for ID %d are not equivalent", id));
+ }
+ }
+ }
+ while (reader.loadNextBatch());
+ }
+ }
+ }
+
+ private ArrowStreamReader createMultiBatchReader() throws IOException {
+ ByteArrayOutputStream os = new ByteArrayOutputStream();
+ try (final VarCharVector dictVector = new VarCharVector("dict", allocator);
+ IntVector vector = new IntVector("foo", allocator)) {
+ // create dictionary and provider
+ DictionaryProvider.MapDictionaryProvider provider = new DictionaryProvider.MapDictionaryProvider();
+ dictVector.allocateNewSafe();
+ dictVector.setSafe(0, "aa".getBytes());
+ dictVector.setSafe(1, "bb".getBytes());
+ dictVector.setSafe(2, "cc".getBytes());
+ dictVector.setSafe(3, "dd".getBytes());
+ dictVector.setSafe(4, "ee".getBytes());
+ dictVector.setValueCount(5);
+ Dictionary dictionary = new Dictionary(dictVector, new DictionaryEncoding(1L, false, /* indexType= */null));
+ provider.put(dictionary);
+
+ Schema schema = new Schema(Collections.singletonList(vector.getField()));
+ try (
+ VectorSchemaRoot root = new VectorSchemaRoot(schema, Collections.singletonList(vector),
+ vector.getValueCount());
+ ArrowStreamWriter writer = new ArrowStreamWriter(root, provider, Channels.newChannel(os));) {
+
+ writer.start();
+
+ // Batch 1
+ vector.setNull(0);
+ vector.setSafe(1, 1);
+ vector.setSafe(2, 2);
+ vector.setNull(3);
+ vector.setSafe(4, 1);
+ vector.setValueCount(5);
+ root.setRowCount(5);
+ writer.writeBatch();
+
+ // Batch 2
+ vector.setNull(0);
+ vector.setSafe(1, 1);
+ vector.setSafe(2, 2);
+ vector.setValueCount(3);
+ root.setRowCount(3);
+ writer.writeBatch();
+
+ // Batch 3
+ vector.setSafe(0, 0);
+ vector.setSafe(1, 1);
+ vector.setSafe(2, 2);
+ vector.setSafe(3, 3);
+ vector.setSafe(4, 4);
+ vector.setValueCount(5);
+ root.setRowCount(5);
+ writer.writeBatch();
+
+ writer.end();
+ }
+ }
+
+ ByteArrayInputStream in = new ByteArrayInputStream(os.toByteArray());
+ return new ArrowStreamReader(in, allocator);
+ }
+
+}
diff --git a/src/arrow/java/c/src/test/java/org/apache/arrow/c/FlagsTest.java b/src/arrow/java/c/src/test/java/org/apache/arrow/c/FlagsTest.java
new file mode 100644
index 000000000..35f836f71
--- /dev/null
+++ b/src/arrow/java/c/src/test/java/org/apache/arrow/c/FlagsTest.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.c;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import java.util.ArrayList;
+
+import org.apache.arrow.c.Flags;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.junit.jupiter.api.Test;
+
+public class FlagsTest {
+ @Test
+ public void testForFieldNullableOrderedDict() {
+ FieldType fieldType = new FieldType(true, ArrowType.Binary.INSTANCE,
+ new DictionaryEncoding(123L, true, new ArrowType.Int(8, true)));
+
+ assertEquals(Flags.ARROW_FLAG_DICTIONARY_ORDERED | Flags.ARROW_FLAG_NULLABLE,
+ Flags.forField(new Field("Name", fieldType, new ArrayList<>())));
+ }
+
+ @Test
+ public void testForFieldOrderedDict() {
+ FieldType fieldType = new FieldType(false, ArrowType.Binary.INSTANCE,
+ new DictionaryEncoding(123L, true, new ArrowType.Int(8, true)));
+ assertEquals(Flags.ARROW_FLAG_DICTIONARY_ORDERED, Flags.forField(new Field("Name", fieldType, new ArrayList<>())));
+ }
+
+ @Test
+ public void testForFieldNullableDict() {
+ FieldType fieldType = new FieldType(true, ArrowType.Binary.INSTANCE,
+ new DictionaryEncoding(123L, false, new ArrowType.Int(8, true)));
+ assertEquals(Flags.ARROW_FLAG_NULLABLE, Flags.forField(new Field("Name", fieldType, new ArrayList<>())));
+ }
+
+ @Test
+ public void testForFieldNullable() {
+ FieldType fieldType = new FieldType(true, ArrowType.Binary.INSTANCE, null);
+ assertEquals(Flags.ARROW_FLAG_NULLABLE, Flags.forField(new Field("Name", fieldType, new ArrayList<>())));
+ }
+
+ @Test
+ public void testForFieldNullableOrderedSortedMap() {
+ ArrowType.Map type = new ArrowType.Map(true);
+ FieldType fieldType = new FieldType(true, type, new DictionaryEncoding(123L, true, new ArrowType.Int(8, true)));
+ assertEquals(Flags.ARROW_FLAG_DICTIONARY_ORDERED | Flags.ARROW_FLAG_NULLABLE | Flags.ARROW_FLAG_MAP_KEYS_SORTED,
+ Flags.forField(new Field("Name", fieldType, new ArrayList<>())));
+ }
+
+ @Test
+ public void testForFieldNullableOrderedMap() {
+ ArrowType.Map type = new ArrowType.Map(false);
+ FieldType fieldType = new FieldType(true, type, new DictionaryEncoding(123L, true, new ArrowType.Int(8, true)));
+ assertEquals(Flags.ARROW_FLAG_DICTIONARY_ORDERED | Flags.ARROW_FLAG_NULLABLE,
+ Flags.forField(new Field("Name", fieldType, new ArrayList<>())));
+ }
+}
diff --git a/src/arrow/java/c/src/test/java/org/apache/arrow/c/FormatTest.java b/src/arrow/java/c/src/test/java/org/apache/arrow/c/FormatTest.java
new file mode 100644
index 000000000..1f7f86b36
--- /dev/null
+++ b/src/arrow/java/c/src/test/java/org/apache/arrow/c/FormatTest.java
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.c;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import org.apache.arrow.c.Flags;
+import org.apache.arrow.c.Format;
+import org.apache.arrow.vector.types.DateUnit;
+import org.apache.arrow.vector.types.FloatingPointPrecision;
+import org.apache.arrow.vector.types.IntervalUnit;
+import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.UnionMode;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.junit.jupiter.api.Test;
+
+public class FormatTest {
+ @Test
+ public void testAsString() {
+ assertEquals("z", Format.asString(new ArrowType.Binary()));
+ assertEquals("b", Format.asString(new ArrowType.Bool()));
+ assertEquals("tdD", Format.asString(new ArrowType.Date(DateUnit.DAY)));
+ assertEquals("tdm", Format.asString(new ArrowType.Date(DateUnit.MILLISECOND)));
+ assertEquals("d:1,1", Format.asString(new ArrowType.Decimal(1, 1, 128)));
+ assertEquals("d:1,1,1", Format.asString(new ArrowType.Decimal(1, 1, 1)));
+ assertEquals("d:9,1,1", Format.asString(new ArrowType.Decimal(9, 1, 1)));
+ assertEquals("tDs", Format.asString(new ArrowType.Duration(TimeUnit.SECOND)));
+ assertEquals("tDm", Format.asString(new ArrowType.Duration(TimeUnit.MILLISECOND)));
+ assertEquals("tDu", Format.asString(new ArrowType.Duration(TimeUnit.MICROSECOND)));
+ assertEquals("tDn", Format.asString(new ArrowType.Duration(TimeUnit.NANOSECOND)));
+ assertEquals("w:1", Format.asString(new ArrowType.FixedSizeBinary(1)));
+ assertEquals("+w:3", Format.asString(new ArrowType.FixedSizeList(3)));
+ assertEquals("e", Format.asString(new ArrowType.FloatingPoint(FloatingPointPrecision.HALF)));
+ assertEquals("f", Format.asString(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)));
+ assertEquals("g", Format.asString(new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)));
+ assertEquals("c", Format.asString(new ArrowType.Int(Byte.SIZE, true)));
+ assertEquals("C", Format.asString(new ArrowType.Int(Byte.SIZE, false)));
+ assertEquals("s", Format.asString(new ArrowType.Int(Short.SIZE, true)));
+ assertEquals("S", Format.asString(new ArrowType.Int(Short.SIZE, false)));
+ assertEquals("i", Format.asString(new ArrowType.Int(Integer.SIZE, true)));
+ assertEquals("I", Format.asString(new ArrowType.Int(Integer.SIZE, false)));
+ assertEquals("l", Format.asString(new ArrowType.Int(Long.SIZE, true)));
+ assertEquals("L", Format.asString(new ArrowType.Int(Long.SIZE, false)));
+ assertEquals("tiD", Format.asString(new ArrowType.Interval(IntervalUnit.DAY_TIME)));
+ assertEquals("tiM", Format.asString(new ArrowType.Interval(IntervalUnit.YEAR_MONTH)));
+ assertEquals("Z", Format.asString(new ArrowType.LargeBinary()));
+ assertEquals("+L", Format.asString(new ArrowType.LargeList()));
+ assertEquals("U", Format.asString(new ArrowType.LargeUtf8()));
+ assertEquals("+l", Format.asString(new ArrowType.List()));
+ assertEquals("+m", Format.asString(new ArrowType.Map(true)));
+ assertEquals("n", Format.asString(new ArrowType.Null()));
+ assertEquals("+s", Format.asString(new ArrowType.Struct()));
+ assertEquals("tts", Format.asString(new ArrowType.Time(TimeUnit.SECOND, 32)));
+ assertEquals("ttm", Format.asString(new ArrowType.Time(TimeUnit.MILLISECOND, 32)));
+ assertEquals("ttu", Format.asString(new ArrowType.Time(TimeUnit.MICROSECOND, 64)));
+ assertEquals("ttn", Format.asString(new ArrowType.Time(TimeUnit.NANOSECOND, 64)));
+ assertEquals("tss:Timezone", Format.asString(new ArrowType.Timestamp(TimeUnit.SECOND, "Timezone")));
+ assertEquals("tsm:Timezone", Format.asString(new ArrowType.Timestamp(TimeUnit.MILLISECOND, "Timezone")));
+ assertEquals("tsu:Timezone", Format.asString(new ArrowType.Timestamp(TimeUnit.MICROSECOND, "Timezone")));
+ assertEquals("tsn:Timezone", Format.asString(new ArrowType.Timestamp(TimeUnit.NANOSECOND, "Timezone")));
+ assertEquals("+us:1,1,1", Format.asString(new ArrowType.Union(UnionMode.Sparse, new int[] { 1, 1, 1 })));
+ assertEquals("+ud:1,1,1", Format.asString(new ArrowType.Union(UnionMode.Dense, new int[] { 1, 1, 1 })));
+ assertEquals("u", Format.asString(new ArrowType.Utf8()));
+
+ assertThrows(UnsupportedOperationException.class, () -> Format.asString(new ArrowType.Int(1, true)));
+ assertThrows(UnsupportedOperationException.class, () -> Format.asString(new ArrowType.Time(TimeUnit.SECOND, 1)));
+ assertThrows(UnsupportedOperationException.class,
+ () -> Format.asString(new ArrowType.Time(TimeUnit.MILLISECOND, 64)));
+ }
+
+ @Test
+ public void testAsType() throws IllegalStateException, NumberFormatException, UnsupportedOperationException {
+ assertTrue(Format.asType("n", 0L) instanceof ArrowType.Null);
+ assertTrue(Format.asType("b", 0L) instanceof ArrowType.Bool);
+ assertEquals(new ArrowType.Int(Byte.SIZE, true), Format.asType("c", 0L));
+ assertEquals(new ArrowType.Int(Byte.SIZE, false), Format.asType("C", 0L));
+ assertEquals(new ArrowType.Int(Short.SIZE, true), Format.asType("s", 0L));
+ assertEquals(new ArrowType.Int(Short.SIZE, false), Format.asType("S", 0L));
+ assertEquals(new ArrowType.Int(Integer.SIZE, true), Format.asType("i", 0L));
+ assertEquals(new ArrowType.Int(Integer.SIZE, false), Format.asType("I", 0L));
+ assertEquals(new ArrowType.Int(Long.SIZE, true), Format.asType("l", 0L));
+ assertEquals(new ArrowType.Int(Long.SIZE, false), Format.asType("L", 0L));
+ assertEquals(new ArrowType.FloatingPoint(FloatingPointPrecision.HALF), Format.asType("e", 0L));
+ assertEquals(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE), Format.asType("f", 0L));
+ assertEquals(new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), Format.asType("g", 0L));
+ assertTrue(Format.asType("z", 0L) instanceof ArrowType.Binary);
+ assertTrue(Format.asType("Z", 0L) instanceof ArrowType.LargeBinary);
+ assertTrue(Format.asType("u", 0L) instanceof ArrowType.Utf8);
+ assertTrue(Format.asType("U", 0L) instanceof ArrowType.LargeUtf8);
+ assertEquals(new ArrowType.Date(DateUnit.DAY), Format.asType("tdD", 0L));
+ assertEquals(new ArrowType.Date(DateUnit.MILLISECOND), Format.asType("tdm", 0L));
+ assertEquals(new ArrowType.Time(TimeUnit.SECOND, Integer.SIZE), Format.asType("tts", 0L));
+ assertEquals(new ArrowType.Time(TimeUnit.MILLISECOND, Integer.SIZE), Format.asType("ttm", 0L));
+ assertEquals(new ArrowType.Time(TimeUnit.MICROSECOND, Long.SIZE), Format.asType("ttu", 0L));
+ assertEquals(new ArrowType.Time(TimeUnit.NANOSECOND, Long.SIZE), Format.asType("ttn", 0L));
+ assertEquals(new ArrowType.Duration(TimeUnit.SECOND), Format.asType("tDs", 0L));
+ assertEquals(new ArrowType.Duration(TimeUnit.MILLISECOND), Format.asType("tDm", 0L));
+ assertEquals(new ArrowType.Duration(TimeUnit.MICROSECOND), Format.asType("tDu", 0L));
+ assertEquals(new ArrowType.Duration(TimeUnit.NANOSECOND), Format.asType("tDn", 0L));
+ assertEquals(new ArrowType.Interval(IntervalUnit.YEAR_MONTH), Format.asType("tiM", 0L));
+ assertEquals(new ArrowType.Interval(IntervalUnit.DAY_TIME), Format.asType("tiD", 0L));
+ assertTrue(Format.asType("+l", 0L) instanceof ArrowType.List);
+ assertTrue(Format.asType("+L", 0L) instanceof ArrowType.LargeList);
+ assertTrue(Format.asType("+s", 0L) instanceof ArrowType.Struct);
+ assertEquals(new ArrowType.Map(false), Format.asType("+m", 0L));
+ assertEquals(new ArrowType.Map(true), Format.asType("+m", Flags.ARROW_FLAG_MAP_KEYS_SORTED));
+ assertEquals(new ArrowType.Decimal(1, 1, 128), Format.asType("d:1,1", 0L));
+ assertEquals(new ArrowType.Decimal(1, 1, 1), Format.asType("d:1,1,1", 0L));
+ assertEquals(new ArrowType.Decimal(9, 1, 1), Format.asType("d:9,1,1", 0L));
+ assertEquals(new ArrowType.FixedSizeBinary(1), Format.asType("w:1", 0L));
+ assertEquals(new ArrowType.FixedSizeList(3), Format.asType("+w:3", 0L));
+ assertEquals(new ArrowType.Union(UnionMode.Dense, new int[] { 1, 1, 1 }), Format.asType("+ud:1,1,1", 0L));
+ assertEquals(new ArrowType.Union(UnionMode.Sparse, new int[] { 1, 1, 1 }), Format.asType("+us:1,1,1", 0L));
+ assertEquals(new ArrowType.Timestamp(TimeUnit.SECOND, "Timezone"), Format.asType("tss:Timezone", 0L));
+ assertEquals(new ArrowType.Timestamp(TimeUnit.MILLISECOND, "Timezone"), Format.asType("tsm:Timezone", 0L));
+ assertEquals(new ArrowType.Timestamp(TimeUnit.MICROSECOND, "Timezone"), Format.asType("tsu:Timezone", 0L));
+ assertEquals(new ArrowType.Timestamp(TimeUnit.NANOSECOND, "Timezone"), Format.asType("tsn:Timezone", 0L));
+
+ assertThrows(UnsupportedOperationException.class, () -> Format.asType("Format", 0L));
+ assertThrows(UnsupportedOperationException.class, () -> Format.asType(":", 0L));
+ assertThrows(NumberFormatException.class, () -> Format.asType("w:1,2,3", 0L));
+ }
+}
diff --git a/src/arrow/java/c/src/test/java/org/apache/arrow/c/MetadataTest.java b/src/arrow/java/c/src/test/java/org/apache/arrow/c/MetadataTest.java
new file mode 100644
index 000000000..1d9703b1a
--- /dev/null
+++ b/src/arrow/java/c/src/test/java/org/apache/arrow/c/MetadataTest.java
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.c;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.arrow.c.Metadata;
+import org.apache.arrow.c.NativeUtil;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.memory.util.LargeMemoryUtil;
+import org.apache.arrow.memory.util.MemoryUtil;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+public class MetadataTest {
+ private RootAllocator allocator = null;
+
+ private static Map<String, String> metadata;
+ private static byte[] encoded;
+
+ @BeforeAll
+ static void beforeAll() {
+ metadata = new HashMap<>();
+ metadata.put("key1", "");
+ metadata.put("key2", "bar");
+
+ if (ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN) {
+ encoded = new byte[] { 2, 0, 0, 0, 4, 0, 0, 0, 'k', 'e', 'y', '1', 0, 0, 0, 0, 4, 0, 0, 0, 'k', 'e', 'y', '2', 3,
+ 0, 0, 0, 'b', 'a', 'r' };
+ } else {
+ encoded = new byte[] { 0, 0, 0, 2, 0, 0, 0, 4, 'k', 'e', 'y', '1', 0, 0, 0, 0, 0, 0, 0, 4, 'k', 'e', 'y', '2', 0,
+ 0, 0, 3, 'b', 'a', 'r' };
+ }
+ }
+
+ @BeforeEach
+ public void setUp() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ }
+
+ @AfterEach
+ public void tearDown() {
+ allocator.close();
+ }
+
+ @Test
+ public void testEncode() {
+ try (ArrowBuf buffer = Metadata.encode(allocator, metadata)) {
+ int totalSize = LargeMemoryUtil.checkedCastToInt(buffer.readableBytes());
+ ByteBuffer reader = MemoryUtil.directBuffer(buffer.memoryAddress(), totalSize).order(ByteOrder.nativeOrder());
+ byte[] result = new byte[totalSize];
+ reader.get(result);
+ assertArrayEquals(encoded, result);
+ }
+ }
+
+ @Test
+ public void testDecode() {
+ try (ArrowBuf buffer = allocator.buffer(31)) {
+ buffer.setBytes(0, encoded);
+ Map<String, String> decoded = Metadata.decode(buffer.memoryAddress());
+ assertNotNull(decoded);
+ assertEquals(metadata, decoded);
+ }
+ }
+
+ @Test
+ public void testEncodeEmpty() {
+ Map<String, String> metadata = new HashMap<>();
+ try (ArrowBuf encoded = Metadata.encode(allocator, metadata)) {
+ assertNull(encoded);
+ }
+ }
+
+ @Test
+ public void testDecodeEmpty() {
+ Map<String, String> decoded = Metadata.decode(NativeUtil.NULL);
+ assertNull(decoded);
+ }
+
+}
diff --git a/src/arrow/java/c/src/test/java/org/apache/arrow/c/NativeUtilTest.java b/src/arrow/java/c/src/test/java/org/apache/arrow/c/NativeUtilTest.java
new file mode 100644
index 000000000..f46a0128c
--- /dev/null
+++ b/src/arrow/java/c/src/test/java/org/apache/arrow/c/NativeUtilTest.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.c;
+
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+
+import org.apache.arrow.c.NativeUtil;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.memory.util.LargeMemoryUtil;
+import org.apache.arrow.memory.util.MemoryUtil;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+public class NativeUtilTest {
+
+ private RootAllocator allocator = null;
+
+ @BeforeEach
+ public void setUp() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ }
+
+ @AfterEach
+ public void tearDown() {
+ allocator.close();
+ }
+
+ @Test
+ public void testString() {
+ String javaString = "abc";
+ byte[] nativeString = new byte[] { 97, 98, 99, 0 };
+ try (ArrowBuf buffer = NativeUtil.toNativeString(allocator, javaString)) {
+ int totalSize = LargeMemoryUtil.checkedCastToInt(buffer.readableBytes());
+ ByteBuffer reader = MemoryUtil.directBuffer(buffer.memoryAddress(), totalSize).order(ByteOrder.nativeOrder());
+ byte[] result = new byte[totalSize];
+ reader.get(result);
+ assertArrayEquals(nativeString, result);
+
+ assertEquals(javaString, NativeUtil.toJavaString(buffer.memoryAddress()));
+ }
+ }
+
+ @Test
+ public void testToJavaArray() {
+ long[] nativeArray = new long[] { 1, 2, 3 };
+ try (ArrowBuf buffer = allocator.buffer(Long.BYTES * nativeArray.length, null)) {
+ for (long value : nativeArray) {
+ buffer.writeLong(value);
+ }
+ long[] actual = NativeUtil.toJavaArray(buffer.memoryAddress(), nativeArray.length);
+ assertArrayEquals(nativeArray, actual);
+ }
+ }
+
+ @Test
+ public void testToZeroJavaArray() {
+ long[] actual = NativeUtil.toJavaArray(0xDEADBEEF, 0);
+ assertEquals(0, actual.length);
+ }
+
+}
diff --git a/src/arrow/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java b/src/arrow/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java
new file mode 100644
index 000000000..059ca3284
--- /dev/null
+++ b/src/arrow/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java
@@ -0,0 +1,795 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.c;
+
+import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.UUID;
+import java.util.stream.Collectors;
+
+import org.apache.arrow.c.ArrowArray;
+import org.apache.arrow.c.ArrowSchema;
+import org.apache.arrow.c.Data;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.BitVector;
+import org.apache.arrow.vector.DateDayVector;
+import org.apache.arrow.vector.DateMilliVector;
+import org.apache.arrow.vector.DecimalVector;
+import org.apache.arrow.vector.DurationVector;
+import org.apache.arrow.vector.ExtensionTypeVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.FixedSizeBinaryVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.IntervalDayVector;
+import org.apache.arrow.vector.IntervalYearVector;
+import org.apache.arrow.vector.LargeVarBinaryVector;
+import org.apache.arrow.vector.LargeVarCharVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.SmallIntVector;
+import org.apache.arrow.vector.TimeMicroVector;
+import org.apache.arrow.vector.TimeMilliVector;
+import org.apache.arrow.vector.TimeNanoVector;
+import org.apache.arrow.vector.TimeSecVector;
+import org.apache.arrow.vector.TimeStampMicroTZVector;
+import org.apache.arrow.vector.TimeStampMicroVector;
+import org.apache.arrow.vector.TimeStampMilliTZVector;
+import org.apache.arrow.vector.TimeStampMilliVector;
+import org.apache.arrow.vector.TimeStampNanoTZVector;
+import org.apache.arrow.vector.TimeStampNanoVector;
+import org.apache.arrow.vector.TimeStampSecTZVector;
+import org.apache.arrow.vector.TimeStampSecVector;
+import org.apache.arrow.vector.TinyIntVector;
+import org.apache.arrow.vector.UInt1Vector;
+import org.apache.arrow.vector.UInt2Vector;
+import org.apache.arrow.vector.UInt4Vector;
+import org.apache.arrow.vector.UInt8Vector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.VarBinaryVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.ZeroVector;
+import org.apache.arrow.vector.compare.VectorEqualsVisitor;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.MapVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.complex.impl.UnionMapWriter;
+import org.apache.arrow.vector.holders.IntervalDayHolder;
+import org.apache.arrow.vector.holders.NullableLargeVarBinaryHolder;
+import org.apache.arrow.vector.holders.NullableUInt4Holder;
+import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType;
+import org.apache.arrow.vector.types.pojo.ExtensionTypeRegistry;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+public class RoundtripTest {
+ private static final String EMPTY_SCHEMA_PATH = "";
+ private RootAllocator allocator = null;
+
+ @BeforeEach
+ public void setUp() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ }
+
+ @AfterEach
+ public void tearDown() {
+ allocator.close();
+ }
+
+ FieldVector vectorRoundtrip(FieldVector vector) {
+ // Consumer allocates empty structures
+ try (ArrowSchema consumerArrowSchema = ArrowSchema.allocateNew(allocator);
+ ArrowArray consumerArrowArray = ArrowArray.allocateNew(allocator)) {
+
+ // Producer creates structures from existing memory pointers
+ try (ArrowSchema arrowSchema = ArrowSchema.wrap(consumerArrowSchema.memoryAddress());
+ ArrowArray arrowArray = ArrowArray.wrap(consumerArrowArray.memoryAddress())) {
+ // Producer exports vector into the C Data Interface structures
+ Data.exportVector(allocator, vector, null, arrowArray, arrowSchema);
+ }
+
+ // Consumer imports vector
+ return Data.importVector(allocator, consumerArrowArray, consumerArrowSchema, null);
+ }
+ }
+
+ VectorSchemaRoot vectorSchemaRootRoundtrip(VectorSchemaRoot root) {
+ // Consumer allocates empty structures
+ try (ArrowSchema consumerArrowSchema = ArrowSchema.allocateNew(allocator);
+ ArrowArray consumerArrowArray = ArrowArray.allocateNew(allocator)) {
+
+ // Producer creates structures from existing memory pointers
+ try (ArrowSchema arrowSchema = ArrowSchema.wrap(consumerArrowSchema.memoryAddress());
+ ArrowArray arrowArray = ArrowArray.wrap(consumerArrowArray.memoryAddress())) {
+ // Producer exports vector into the C Data Interface structures
+ Data.exportVectorSchemaRoot(allocator, root, null, arrowArray, arrowSchema);
+ }
+
+ // Consumer imports vector
+ return Data.importVectorSchemaRoot(allocator, consumerArrowArray, consumerArrowSchema, null);
+ }
+ }
+
+ boolean roundtrip(FieldVector vector, Class<?> clazz) {
+ try (ValueVector imported = vectorRoundtrip(vector)) {
+ assertTrue(clazz.isInstance(imported), String.format("expected %s but was %s", clazz, imported.getClass()));
+ return VectorEqualsVisitor.vectorEquals(vector, imported);
+ }
+ }
+
+ @Test
+ public void testBitVector() {
+ BitVector imported;
+
+ try (final BitVector vector = new BitVector(EMPTY_SCHEMA_PATH, allocator)) {
+ vector.allocateNew(1024);
+ vector.setValueCount(1024);
+
+ // Put and set a few values
+ vector.set(0, 1);
+ vector.set(1, 0);
+ vector.set(100, 0);
+ vector.set(1022, 1);
+
+ vector.setValueCount(1024);
+
+ imported = (BitVector) vectorRoundtrip(vector);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector, imported));
+ }
+
+ assertEquals(1, imported.get(0));
+ assertEquals(0, imported.get(1));
+ assertEquals(0, imported.get(100));
+ assertEquals(1, imported.get(1022));
+ assertEquals(1020, imported.getNullCount());
+ imported.close();
+ }
+
+ @Test
+ public void testIntVector() {
+ IntVector imported;
+ try (final IntVector vector = new IntVector("v", allocator)) {
+ setVector(vector, 1, 2, 3, null);
+ imported = (IntVector) vectorRoundtrip(vector);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector, imported));
+ }
+ assertEquals(1, imported.get(0));
+ assertEquals(2, imported.get(1));
+ assertEquals(3, imported.get(2));
+ assertEquals(4, imported.getValueCount());
+ assertEquals(1, imported.getNullCount());
+ imported.close();
+ }
+
+ @Test
+ public void testBigIntVector() {
+ BigIntVector imported;
+ try (final BigIntVector vector = new BigIntVector("v", allocator)) {
+ setVector(vector, 1L, 2L, 3L, null);
+ imported = (BigIntVector) vectorRoundtrip(vector);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector, imported));
+ }
+ assertEquals(1, imported.get(0));
+ assertEquals(2, imported.get(1));
+ assertEquals(3, imported.get(2));
+ assertEquals(4, imported.getValueCount());
+ assertEquals(1, imported.getNullCount());
+ imported.close();
+ }
+
+ @Test
+ public void testDateDayVector() {
+ DateDayVector imported;
+ try (final DateDayVector vector = new DateDayVector("v", allocator)) {
+ setVector(vector, 1, 2, 3, null);
+ imported = (DateDayVector) vectorRoundtrip(vector);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector, imported));
+ }
+ assertEquals(1, imported.get(0));
+ assertEquals(2, imported.get(1));
+ assertEquals(3, imported.get(2));
+ assertEquals(4, imported.getValueCount());
+ assertEquals(1, imported.getNullCount());
+ imported.close();
+ }
+
+ @Test
+ public void testDateMilliVector() {
+ DateMilliVector imported;
+ try (final DateMilliVector vector = new DateMilliVector("v", allocator)) {
+ setVector(vector, 1L, 2L, 3L, null);
+ imported = (DateMilliVector) vectorRoundtrip(vector);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector, imported));
+ }
+ assertEquals(1, imported.get(0));
+ assertEquals(2, imported.get(1));
+ assertEquals(3, imported.get(2));
+ assertEquals(4, imported.getValueCount());
+ assertEquals(1, imported.getNullCount());
+ imported.close();
+ }
+
+ @Test
+ public void testDecimalVector() {
+ try (final DecimalVector vector = new DecimalVector("v", allocator, 1, 1)) {
+ setVector(vector, 1L, 2L, 3L, null);
+ assertTrue(roundtrip(vector, DecimalVector.class));
+ }
+ }
+
+ @Test
+ public void testDurationVector() {
+ for (TimeUnit unit : TimeUnit.values()) {
+ final FieldType fieldType = FieldType.nullable(new ArrowType.Duration(unit));
+ try (final DurationVector vector = new DurationVector("v", fieldType, allocator)) {
+ setVector(vector, 1L, 2L, 3L, null);
+ assertTrue(roundtrip(vector, DurationVector.class));
+ }
+ }
+ }
+
+ @Test
+ public void testZeroVectorEquals() {
+ try (final ZeroVector vector = new ZeroVector()) {
+ // A ZeroVector is imported as a NullVector
+ assertTrue(roundtrip(vector, NullVector.class));
+ }
+ }
+
+ @Test
+ public void testFixedSizeBinaryVector() {
+ try (final FixedSizeBinaryVector vector = new FixedSizeBinaryVector("v", allocator, 2)) {
+ setVector(vector, new byte[] { 0b0000, 0b0001 }, new byte[] { 0b0010, 0b0011 });
+ assertTrue(roundtrip(vector, FixedSizeBinaryVector.class));
+ }
+ }
+
+ @Test
+ public void testFloat4Vector() {
+ try (final Float4Vector vector = new Float4Vector("v", allocator)) {
+ setVector(vector, 0.1f, 0.2f, 0.3f, null);
+ assertTrue(roundtrip(vector, Float4Vector.class));
+ }
+ }
+
+ @Test
+ public void testFloat8Vector() {
+ try (final Float8Vector vector = new Float8Vector("v", allocator)) {
+ setVector(vector, 0.1d, 0.2d, 0.3d, null);
+ assertTrue(roundtrip(vector, Float8Vector.class));
+ }
+ }
+
+ @Test
+ public void testIntervalDayVector() {
+ try (final IntervalDayVector vector = new IntervalDayVector("v", allocator)) {
+ IntervalDayHolder value = new IntervalDayHolder();
+ value.days = 5;
+ value.milliseconds = 100;
+ setVector(vector, value, null);
+ assertTrue(roundtrip(vector, IntervalDayVector.class));
+ }
+ }
+
+ @Test
+ public void testIntervalYearVector() {
+ try (final IntervalYearVector vector = new IntervalYearVector("v", allocator)) {
+ setVector(vector, 1990, 2000, 2010, 2020, null);
+ assertTrue(roundtrip(vector, IntervalYearVector.class));
+ }
+ }
+
+ @Test
+ public void testSmallIntVector() {
+ try (final SmallIntVector vector = new SmallIntVector("v", allocator)) {
+ setVector(vector, (short) 0, (short) 256, null);
+ assertTrue(roundtrip(vector, SmallIntVector.class));
+ }
+ }
+
+ @Test
+ public void testTimeMicroVector() {
+ try (final TimeMicroVector vector = new TimeMicroVector("v", allocator)) {
+ setVector(vector, 0L, 1L, 2L, 3L, null);
+ assertTrue(roundtrip(vector, TimeMicroVector.class));
+ }
+ }
+
+ @Test
+ public void testTimeMilliVector() {
+ try (final TimeMilliVector vector = new TimeMilliVector("v", allocator)) {
+ setVector(vector, 0, 1, 2, 3, null);
+ assertTrue(roundtrip(vector, TimeMilliVector.class));
+ }
+ }
+
+ @Test
+ public void testTimeNanoVector() {
+ try (final TimeNanoVector vector = new TimeNanoVector("v", allocator)) {
+ setVector(vector, 0L, 1L, 2L, 3L, null);
+ assertTrue(roundtrip(vector, TimeNanoVector.class));
+ }
+ }
+
+ @Test
+ public void testTimeSecVector() {
+ try (final TimeSecVector vector = new TimeSecVector("v", allocator)) {
+ setVector(vector, 0, 1, 2, 3, null);
+ assertTrue(roundtrip(vector, TimeSecVector.class));
+ }
+ }
+
+ @Test
+ public void testTimeStampMicroTZVector() {
+ try (final TimeStampMicroTZVector vector = new TimeStampMicroTZVector("v", allocator, "UTC")) {
+ setVector(vector, 0L, 1L, 2L, 3L, null);
+ assertTrue(roundtrip(vector, TimeStampMicroTZVector.class));
+ }
+ }
+
+ @Test
+ public void testTimeStampMicroVector() {
+ try (final TimeStampMicroVector vector = new TimeStampMicroVector("v", allocator)) {
+ setVector(vector, 0L, 1L, 2L, 3L, null);
+ assertTrue(roundtrip(vector, TimeStampMicroVector.class));
+ }
+ }
+
+ @Test
+ public void testTimeStampMilliTZVector() {
+ try (final TimeStampMilliTZVector vector = new TimeStampMilliTZVector("v", allocator, "UTC")) {
+ setVector(vector, 0L, 1L, 2L, 3L, null);
+ assertTrue(roundtrip(vector, TimeStampMilliTZVector.class));
+ }
+ }
+
+ @Test
+ public void testTimeStampMilliVector() {
+ try (final TimeStampMilliVector vector = new TimeStampMilliVector("v", allocator)) {
+ setVector(vector, 0L, 1L, 2L, 3L, null);
+ assertTrue(roundtrip(vector, TimeStampMilliVector.class));
+ }
+ }
+
+ @Test
+ public void testTimeTimeStampNanoTZVector() {
+ try (final TimeStampNanoTZVector vector = new TimeStampNanoTZVector("v", allocator, "UTC")) {
+ setVector(vector, 0L, 1L, 2L, 3L, null);
+ assertTrue(roundtrip(vector, TimeStampNanoTZVector.class));
+ }
+ }
+
+ @Test
+ public void testTimeStampNanoVector() {
+ try (final TimeStampNanoVector vector = new TimeStampNanoVector("v", allocator)) {
+ setVector(vector, 0L, 1L, 2L, 3L, null);
+ assertTrue(roundtrip(vector, TimeStampNanoVector.class));
+ }
+ }
+
+ @Test
+ public void testTimeStampSecTZVector() {
+ try (final TimeStampSecTZVector vector = new TimeStampSecTZVector("v", allocator, "UTC")) {
+ setVector(vector, 0L, 1L, 2L, 3L, null);
+ assertTrue(roundtrip(vector, TimeStampSecTZVector.class));
+ }
+ }
+
+ @Test
+ public void testTimeStampSecVector() {
+ try (final TimeStampSecVector vector = new TimeStampSecVector("v", allocator)) {
+ setVector(vector, 0L, 1L, 2L, 3L, null);
+ assertTrue(roundtrip(vector, TimeStampSecVector.class));
+ }
+ }
+
+ @Test
+ public void testTinyIntVector() {
+ try (final TinyIntVector vector = new TinyIntVector("v", allocator)) {
+ setVector(vector, (byte) 0, (byte) 1, null);
+ assertTrue(roundtrip(vector, TinyIntVector.class));
+ }
+ }
+
+ @Test
+ public void testUInt1Vector() {
+ try (final UInt1Vector vector = new UInt1Vector("v", allocator)) {
+ setVector(vector, (byte) 0, (byte) 1, null);
+ assertTrue(roundtrip(vector, UInt1Vector.class));
+ }
+ }
+
+ @Test
+ public void testUInt2Vector() {
+ try (final UInt2Vector vector = new UInt2Vector("v", allocator)) {
+ setVector(vector, '0', '1', null);
+ assertTrue(roundtrip(vector, UInt2Vector.class));
+ }
+ }
+
+ @Test
+ public void testUInt4Vector() {
+ try (final UInt4Vector vector = new UInt4Vector("v", allocator)) {
+ setVector(vector, 0, 1, null);
+ assertTrue(roundtrip(vector, UInt4Vector.class));
+ }
+ }
+
+ @Test
+ public void testUInt8Vector() {
+ try (final UInt8Vector vector = new UInt8Vector("v", allocator)) {
+ setVector(vector, 0L, 1L, null);
+ assertTrue(roundtrip(vector, UInt8Vector.class));
+ }
+ }
+
+ @Test
+ public void testVarBinaryVector() {
+ try (final VarBinaryVector vector = new VarBinaryVector("v", allocator)) {
+ setVector(vector, "abc".getBytes(), "def".getBytes(), null);
+ assertTrue(roundtrip(vector, VarBinaryVector.class));
+ }
+ }
+
+ @Test
+ public void testVarCharVector() {
+ try (final VarCharVector vector = new VarCharVector("v", allocator)) {
+ setVector(vector, "abc", "def", null);
+ assertTrue(roundtrip(vector, VarCharVector.class));
+ }
+ }
+
+ @Test
+ public void testLargeVarBinaryVector() {
+ try (final LargeVarBinaryVector vector = new LargeVarBinaryVector("", allocator)) {
+ vector.allocateNew(5, 1);
+
+ NullableLargeVarBinaryHolder nullHolder = new NullableLargeVarBinaryHolder();
+ nullHolder.isSet = 0;
+
+ NullableLargeVarBinaryHolder binHolder = new NullableLargeVarBinaryHolder();
+ binHolder.isSet = 1;
+
+ String str = "hello world";
+ try (ArrowBuf buf = allocator.buffer(16)) {
+ buf.setBytes(0, str.getBytes());
+ binHolder.start = 0;
+ binHolder.end = str.length();
+ binHolder.buffer = buf;
+ vector.setSafe(0, binHolder);
+ vector.setSafe(1, nullHolder);
+
+ assertTrue(roundtrip(vector, LargeVarBinaryVector.class));
+ }
+ }
+ }
+
+ @Test
+ public void testLargeVarCharVector() {
+ try (final LargeVarCharVector vector = new LargeVarCharVector("v", allocator)) {
+ setVector(vector, "abc", "def", null);
+ assertTrue(roundtrip(vector, LargeVarCharVector.class));
+ }
+ }
+
+ @Test
+ public void testListVector() {
+ try (final ListVector vector = ListVector.empty("v", allocator)) {
+ setVector(vector, Arrays.stream(new int[] { 1, 2 }).boxed().collect(Collectors.toList()),
+ Arrays.stream(new int[] { 3, 4 }).boxed().collect(Collectors.toList()), new ArrayList<Integer>());
+ assertTrue(roundtrip(vector, ListVector.class));
+ }
+ }
+
+ @Test
+ public void testLargeListVector() {
+ try (final LargeListVector vector = LargeListVector.empty("v", allocator)) {
+ setVector(vector, Arrays.stream(new int[] { 1, 2 }).boxed().collect(Collectors.toList()),
+ Arrays.stream(new int[] { 3, 4 }).boxed().collect(Collectors.toList()), new ArrayList<Integer>());
+ assertTrue(roundtrip(vector, LargeListVector.class));
+ }
+ }
+
+ @Test
+ public void testFixedSizeListVector() {
+ try (final FixedSizeListVector vector = FixedSizeListVector.empty("v", 2, allocator)) {
+ setVector(vector, Arrays.stream(new int[] { 1, 2 }).boxed().collect(Collectors.toList()),
+ Arrays.stream(new int[] { 3, 4 }).boxed().collect(Collectors.toList()));
+ assertTrue(roundtrip(vector, FixedSizeListVector.class));
+ }
+ }
+
+ @Test
+ public void testMapVector() {
+ int count = 5;
+ try (final MapVector vector = MapVector.empty("v", allocator, false)) {
+ vector.allocateNew();
+ UnionMapWriter mapWriter = vector.getWriter();
+ for (int i = 0; i < count; i++) {
+ mapWriter.startMap();
+ for (int j = 0; j < i + 1; j++) {
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(j);
+ mapWriter.value().integer().writeInt(j);
+ mapWriter.endEntry();
+ }
+ mapWriter.endMap();
+ }
+ mapWriter.setValueCount(count);
+
+ assertTrue(roundtrip(vector, MapVector.class));
+ }
+ }
+
+ @Test
+ public void testUnionVector() {
+ final NullableUInt4Holder uInt4Holder = new NullableUInt4Holder();
+ uInt4Holder.value = 100;
+ uInt4Holder.isSet = 1;
+
+ try (UnionVector vector = UnionVector.empty("v", allocator)) {
+ vector.allocateNew();
+
+ // write some data
+ vector.setType(0, MinorType.UINT4);
+ vector.setSafe(0, uInt4Holder);
+ vector.setType(2, MinorType.UINT4);
+ vector.setSafe(2, uInt4Holder);
+ vector.setValueCount(4);
+
+ assertTrue(roundtrip(vector, UnionVector.class));
+ }
+ }
+
+ @Test
+ public void testStructVector() {
+ try (final StructVector vector = StructVector.empty("v", allocator)) {
+ Map<String, List<Integer>> data = new HashMap<>();
+ data.put("col_1", Arrays.stream(new int[] { 1, 2 }).boxed().collect(Collectors.toList()));
+ data.put("col_2", Arrays.stream(new int[] { 3, 4 }).boxed().collect(Collectors.toList()));
+ setVector(vector, data);
+ assertTrue(roundtrip(vector, StructVector.class));
+ }
+ }
+
+ @Test
+ public void testExtensionTypeVector() {
+ ExtensionTypeRegistry.register(new UuidType());
+ final Schema schema = new Schema(Collections.singletonList(Field.nullable("a", new UuidType())));
+ try (final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) {
+ // Fill with data
+ UUID u1 = UUID.randomUUID();
+ UUID u2 = UUID.randomUUID();
+ UuidVector vector = (UuidVector) root.getVector("a");
+ vector.setValueCount(2);
+ vector.set(0, u1);
+ vector.set(1, u2);
+ root.setRowCount(2);
+
+ // Roundtrip (export + import)
+ VectorSchemaRoot importedRoot = vectorSchemaRootRoundtrip(root);
+
+ // Verify correctness
+ assertEquals(root.getSchema(), importedRoot.getSchema());
+
+ final Field field = importedRoot.getSchema().getFields().get(0);
+ final UuidType expectedType = new UuidType();
+ assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME), expectedType.extensionName());
+ assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_METADATA), expectedType.serialize());
+
+ final UuidVector deserialized = (UuidVector) importedRoot.getFieldVectors().get(0);
+ assertEquals(vector.getValueCount(), deserialized.getValueCount());
+ for (int i = 0; i < vector.getValueCount(); i++) {
+ assertEquals(vector.isNull(i), deserialized.isNull(i));
+ if (!vector.isNull(i)) {
+ assertEquals(vector.getObject(i), deserialized.getObject(i));
+ }
+ }
+
+ importedRoot.close();
+ }
+ }
+
+ @Test
+ public void testVectorSchemaRoot() {
+ VectorSchemaRoot imported;
+
+ // Consumer allocates empty structures
+ try (ArrowSchema consumerArrowSchema = ArrowSchema.allocateNew(allocator);
+ ArrowArray consumerArrowArray = ArrowArray.allocateNew(allocator)) {
+ try (VectorSchemaRoot vsr = createTestVSR()) {
+ // Producer creates structures from existing memory pointers
+ try (ArrowSchema arrowSchema = ArrowSchema.wrap(consumerArrowSchema.memoryAddress());
+ ArrowArray arrowArray = ArrowArray.wrap(consumerArrowArray.memoryAddress())) {
+ // Producer exports vector into the C Data Interface structures
+ Data.exportVectorSchemaRoot(allocator, vsr, null, arrowArray, arrowSchema);
+ }
+ }
+ // Consumer imports vector
+ imported = Data.importVectorSchemaRoot(allocator, consumerArrowArray, consumerArrowSchema, null);
+ }
+
+ // Ensure that imported VectorSchemaRoot is valid even after C Data Interface
+ // structures are closed
+ try (VectorSchemaRoot original = createTestVSR()) {
+ assertTrue(imported.equals(original));
+ }
+ imported.close();
+ }
+
+ @Test
+ public void testSchema() {
+ Field decimalField = new Field("inner1", FieldType.nullable(new ArrowType.Decimal(19, 4, 128)), null);
+ Field strField = new Field("inner2", FieldType.nullable(new ArrowType.Utf8()), null);
+ Field itemField = new Field("col1", FieldType.nullable(new ArrowType.Struct()),
+ Arrays.asList(decimalField, strField));
+ Field intField = new Field("col2", FieldType.nullable(new ArrowType.Int(32, true)), null);
+ Schema schema = new Schema(Arrays.asList(itemField, intField));
+ // Consumer allocates empty ArrowSchema
+ try (ArrowSchema consumerArrowSchema = ArrowSchema.allocateNew(allocator)) {
+ // Producer fills the schema with data
+ try (ArrowSchema arrowSchema = ArrowSchema.wrap(consumerArrowSchema.memoryAddress())) {
+ Data.exportSchema(allocator, schema, null, arrowSchema);
+ }
+ // Consumer imports schema
+ Schema importedSchema = Data.importSchema(allocator, consumerArrowSchema, null);
+ assertEquals(schema.toJson(), importedSchema.toJson());
+ }
+ }
+
+ @Test
+ public void testImportReleasedArray() {
+ // Consumer allocates empty structures
+ try (ArrowSchema consumerArrowSchema = ArrowSchema.allocateNew(allocator);
+ ArrowArray consumerArrowArray = ArrowArray.allocateNew(allocator)) {
+ // Producer creates structures from existing memory pointers
+ try (ArrowSchema arrowSchema = ArrowSchema.wrap(consumerArrowSchema.memoryAddress());
+ ArrowArray arrowArray = ArrowArray.wrap(consumerArrowArray.memoryAddress())) {
+ // Producer exports vector into the C Data Interface structures
+ try (final NullVector vector = new NullVector()) {
+ Data.exportVector(allocator, vector, null, arrowArray, arrowSchema);
+ }
+ }
+
+ // Release array structure
+ consumerArrowArray.markReleased();
+
+ // Consumer tried to imports vector but fails
+ Exception e = assertThrows(IllegalStateException.class, () -> {
+ Data.importVector(allocator, consumerArrowArray, consumerArrowSchema, null);
+ });
+
+ assertEquals("Cannot import released ArrowArray", e.getMessage());
+ }
+ }
+
+ private VectorSchemaRoot createTestVSR() {
+ BitVector bitVector = new BitVector("boolean", allocator);
+
+ Map<String, String> metadata = new HashMap<>();
+ metadata.put("key", "value");
+ FieldType fieldType = new FieldType(true, ArrowType.Utf8.INSTANCE, null, metadata);
+ VarCharVector varCharVector = new VarCharVector("varchar", fieldType, allocator);
+
+ bitVector.allocateNew();
+ varCharVector.allocateNew();
+ for (int i = 0; i < 10; i++) {
+ bitVector.setSafe(i, i % 2 == 0 ? 0 : 1);
+ varCharVector.setSafe(i, ("test" + i).getBytes(StandardCharsets.UTF_8));
+ }
+ bitVector.setValueCount(10);
+ varCharVector.setValueCount(10);
+
+ List<Field> fields = Arrays.asList(bitVector.getField(), varCharVector.getField());
+ List<FieldVector> vectors = Arrays.asList(bitVector, varCharVector);
+
+ return new VectorSchemaRoot(fields, vectors);
+ }
+
+ static class UuidType extends ExtensionType {
+
+ @Override
+ public ArrowType storageType() {
+ return new ArrowType.FixedSizeBinary(16);
+ }
+
+ @Override
+ public String extensionName() {
+ return "uuid";
+ }
+
+ @Override
+ public boolean extensionEquals(ExtensionType other) {
+ return other instanceof UuidType;
+ }
+
+ @Override
+ public ArrowType deserialize(ArrowType storageType, String serializedData) {
+ if (!storageType.equals(storageType())) {
+ throw new UnsupportedOperationException("Cannot construct UuidType from underlying type " + storageType);
+ }
+ return new UuidType();
+ }
+
+ @Override
+ public String serialize() {
+ return "";
+ }
+
+ @Override
+ public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ return new UuidVector(name, allocator, new FixedSizeBinaryVector(name, allocator, 16));
+ }
+ }
+
+ static class UuidVector extends ExtensionTypeVector<FixedSizeBinaryVector> {
+
+ public UuidVector(String name, BufferAllocator allocator, FixedSizeBinaryVector underlyingVector) {
+ super(name, allocator, underlyingVector);
+ }
+
+ @Override
+ public UUID getObject(int index) {
+ final ByteBuffer bb = ByteBuffer.wrap(getUnderlyingVector().getObject(index));
+ return new UUID(bb.getLong(), bb.getLong());
+ }
+
+ @Override
+ public int hashCode(int index) {
+ return hashCode(index, null);
+ }
+
+ @Override
+ public int hashCode(int index, ArrowBufHasher hasher) {
+ return getUnderlyingVector().hashCode(index, hasher);
+ }
+
+ public void set(int index, UUID uuid) {
+ ByteBuffer bb = ByteBuffer.allocate(16);
+ bb.putLong(uuid.getMostSignificantBits());
+ bb.putLong(uuid.getLeastSignificantBits());
+ getUnderlyingVector().set(index, bb.array());
+ }
+ }
+}
diff --git a/src/arrow/java/compression/pom.xml b/src/arrow/java/compression/pom.xml
new file mode 100644
index 000000000..c2e460380
--- /dev/null
+++ b/src/arrow/java/compression/pom.xml
@@ -0,0 +1,52 @@
+<?xml version="1.0"?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+ license agreements. See the NOTICE file distributed with this work for additional
+ information regarding copyright ownership. The ASF licenses this file to
+ You under the Apache License, Version 2.0 (the "License"); you may not use
+ this file except in compliance with the License. You may obtain a copy of
+ the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+ by applicable law or agreed to in writing, software distributed under the
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ OF ANY KIND, either express or implied. See the License for the specific
+ language governing permissions and limitations under the License. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-java-root</artifactId>
+ <version>6.0.1</version>
+ </parent>
+ <artifactId>arrow-compression</artifactId>
+ <name>Arrow Compression</name>
+ <description>(Experimental/Contrib) A library for working with the compression/decompression of Arrow data.</description>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-vector</artifactId>
+ <version>${project.version}</version>
+ <classifier>${arrow.vector.classifier}</classifier>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-memory-core</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-memory-unsafe</artifactId>
+ <version>${project.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-compress</artifactId>
+ <version>1.20</version>
+ </dependency>
+ <dependency>
+ <groupId>com.github.luben</groupId>
+ <artifactId>zstd-jni</artifactId>
+ <version>1.4.9-1</version>
+</dependency>
+ </dependencies>
+</project>
diff --git a/src/arrow/java/compression/src/main/java/org/apache/arrow/compression/CommonsCompressionFactory.java b/src/arrow/java/compression/src/main/java/org/apache/arrow/compression/CommonsCompressionFactory.java
new file mode 100644
index 000000000..867e9f418
--- /dev/null
+++ b/src/arrow/java/compression/src/main/java/org/apache/arrow/compression/CommonsCompressionFactory.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.compression;
+
+import org.apache.arrow.vector.compression.CompressionCodec;
+import org.apache.arrow.vector.compression.CompressionUtil;
+
+/**
+ * Default implementation of factory supported LZ4 and ZSTD compression.
+ *
+ * // TODO(ARROW-12115): Rename this class.
+ */
+public class CommonsCompressionFactory implements CompressionCodec.Factory {
+
+ public static final CommonsCompressionFactory INSTANCE = new CommonsCompressionFactory();
+
+ @Override
+ public CompressionCodec createCodec(CompressionUtil.CodecType codecType) {
+ switch (codecType) {
+ case LZ4_FRAME:
+ return new Lz4CompressionCodec();
+ case ZSTD:
+ return new ZstdCompressionCodec();
+ default:
+ throw new IllegalArgumentException("Compression type not supported: " + codecType);
+ }
+ }
+}
diff --git a/src/arrow/java/compression/src/main/java/org/apache/arrow/compression/Lz4CompressionCodec.java b/src/arrow/java/compression/src/main/java/org/apache/arrow/compression/Lz4CompressionCodec.java
new file mode 100644
index 000000000..daa35b7e1
--- /dev/null
+++ b/src/arrow/java/compression/src/main/java/org/apache/arrow/compression/Lz4CompressionCodec.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.compression;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.compression.AbstractCompressionCodec;
+import org.apache.arrow.vector.compression.CompressionUtil;
+import org.apache.commons.compress.compressors.lz4.FramedLZ4CompressorInputStream;
+import org.apache.commons.compress.compressors.lz4.FramedLZ4CompressorOutputStream;
+import org.apache.commons.compress.utils.IOUtils;
+
+/**
+ * Compression codec for the LZ4 algorithm.
+ */
+public class Lz4CompressionCodec extends AbstractCompressionCodec {
+
+ @Override
+ protected ArrowBuf doCompress(BufferAllocator allocator, ArrowBuf uncompressedBuffer) {
+ Preconditions.checkArgument(uncompressedBuffer.writerIndex() <= Integer.MAX_VALUE,
+ "The uncompressed buffer size exceeds the integer limit %s.", Integer.MAX_VALUE);
+
+ byte[] inBytes = new byte[(int) uncompressedBuffer.writerIndex()];
+ uncompressedBuffer.getBytes(/*index=*/0, inBytes);
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ try (InputStream in = new ByteArrayInputStream(inBytes);
+ OutputStream out = new FramedLZ4CompressorOutputStream(baos)) {
+ IOUtils.copy(in, out);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+
+ byte[] outBytes = baos.toByteArray();
+
+ ArrowBuf compressedBuffer = allocator.buffer(CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH + outBytes.length);
+ compressedBuffer.setBytes(CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH, outBytes);
+ compressedBuffer.writerIndex(CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH + outBytes.length);
+ return compressedBuffer;
+ }
+
+ @Override
+ protected ArrowBuf doDecompress(BufferAllocator allocator, ArrowBuf compressedBuffer) {
+ Preconditions.checkArgument(compressedBuffer.writerIndex() <= Integer.MAX_VALUE,
+ "The compressed buffer size exceeds the integer limit %s", Integer.MAX_VALUE);
+
+ long decompressedLength = readUncompressedLength(compressedBuffer);
+
+ byte[] inBytes = new byte[(int) (compressedBuffer.writerIndex() - CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH)];
+ compressedBuffer.getBytes(CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH, inBytes);
+ ByteArrayOutputStream out = new ByteArrayOutputStream((int) decompressedLength);
+ try (InputStream in = new FramedLZ4CompressorInputStream(new ByteArrayInputStream(inBytes))) {
+ IOUtils.copy(in, out);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+
+ byte[] outBytes = out.toByteArray();
+ ArrowBuf decompressedBuffer = allocator.buffer(outBytes.length);
+ decompressedBuffer.setBytes(/*index=*/0, outBytes);
+ return decompressedBuffer;
+ }
+
+ @Override
+ public CompressionUtil.CodecType getCodecType() {
+ return CompressionUtil.CodecType.LZ4_FRAME;
+ }
+}
diff --git a/src/arrow/java/compression/src/main/java/org/apache/arrow/compression/ZstdCompressionCodec.java b/src/arrow/java/compression/src/main/java/org/apache/arrow/compression/ZstdCompressionCodec.java
new file mode 100644
index 000000000..38717843e
--- /dev/null
+++ b/src/arrow/java/compression/src/main/java/org/apache/arrow/compression/ZstdCompressionCodec.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.compression;
+
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.compression.AbstractCompressionCodec;
+import org.apache.arrow.vector.compression.CompressionUtil;
+
+import com.github.luben.zstd.Zstd;
+
+/**
+ * Compression codec for the ZSTD algorithm.
+ */
+public class ZstdCompressionCodec extends AbstractCompressionCodec {
+
+ @Override
+ protected ArrowBuf doCompress(BufferAllocator allocator, ArrowBuf uncompressedBuffer) {
+ long maxSize = Zstd.compressBound(uncompressedBuffer.writerIndex());
+ long dstSize = CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH + maxSize;
+ ArrowBuf compressedBuffer = allocator.buffer(dstSize);
+ long bytesWritten = Zstd.compressUnsafe(
+ compressedBuffer.memoryAddress() + CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH, dstSize,
+ /*src*/uncompressedBuffer.memoryAddress(), /*srcSize=*/uncompressedBuffer.writerIndex(),
+ /*level=*/3);
+ if (Zstd.isError(bytesWritten)) {
+ compressedBuffer.close();
+ throw new RuntimeException("Error compressing: " + Zstd.getErrorName(bytesWritten));
+ }
+ compressedBuffer.writerIndex(CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH + bytesWritten);
+ return compressedBuffer;
+ }
+
+ @Override
+ protected ArrowBuf doDecompress(BufferAllocator allocator, ArrowBuf compressedBuffer) {
+ long decompressedLength = readUncompressedLength(compressedBuffer);
+ ArrowBuf uncompressedBuffer = allocator.buffer(decompressedLength);
+ long decompressedSize = Zstd.decompressUnsafe(uncompressedBuffer.memoryAddress(), decompressedLength,
+ /*src=*/compressedBuffer.memoryAddress() + CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH,
+ compressedBuffer.writerIndex() - CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH);
+ if (Zstd.isError(decompressedSize)) {
+ uncompressedBuffer.close();
+ throw new RuntimeException("Error decompressing: " + Zstd.getErrorName(decompressedLength));
+ }
+ if (decompressedLength != decompressedSize) {
+ uncompressedBuffer.close();
+ throw new RuntimeException("Expected != actual decompressed length: " +
+ decompressedLength + " != " + decompressedSize);
+ }
+ uncompressedBuffer.writerIndex(decompressedLength);
+ return uncompressedBuffer;
+ }
+
+ @Override
+ public CompressionUtil.CodecType getCodecType() {
+ return CompressionUtil.CodecType.ZSTD;
+ }
+}
diff --git a/src/arrow/java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodec.java b/src/arrow/java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodec.java
new file mode 100644
index 000000000..1f6d64d47
--- /dev/null
+++ b/src/arrow/java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodec.java
@@ -0,0 +1,213 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.compression;
+
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.util.AutoCloseables;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.VarBinaryVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.compression.CompressionCodec;
+import org.apache.arrow.vector.compression.CompressionUtil;
+import org.apache.arrow.vector.compression.NoCompressionCodec;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+/**
+ * Test cases for {@link CompressionCodec}s.
+ */
+@RunWith(Parameterized.class)
+public class TestCompressionCodec {
+
+ private final CompressionCodec codec;
+
+ private BufferAllocator allocator;
+
+ private final int vectorLength;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Integer.MAX_VALUE);
+ }
+
+ @After
+ public void terminate() {
+ allocator.close();
+ }
+
+ public TestCompressionCodec(CompressionUtil.CodecType type, int vectorLength, CompressionCodec codec) {
+ this.codec = codec;
+ this.vectorLength = vectorLength;
+ }
+
+ @Parameterized.Parameters(name = "codec = {0}, length = {1}")
+ public static Collection<Object[]> getCodecs() {
+ List<Object[]> params = new ArrayList<>();
+
+ int[] lengths = new int[] {10, 100, 1000};
+ for (int len : lengths) {
+ CompressionCodec dumbCodec = NoCompressionCodec.INSTANCE;
+ params.add(new Object[]{dumbCodec.getCodecType(), len, dumbCodec});
+
+ CompressionCodec lz4Codec = new Lz4CompressionCodec();
+ params.add(new Object[]{lz4Codec.getCodecType(), len, lz4Codec});
+
+ CompressionCodec zstdCodec = new ZstdCompressionCodec();
+ params.add(new Object[]{zstdCodec.getCodecType(), len, zstdCodec});
+
+ }
+ return params;
+ }
+
+ private List<ArrowBuf> compressBuffers(List<ArrowBuf> inputBuffers) {
+ List<ArrowBuf> outputBuffers = new ArrayList<>(inputBuffers.size());
+ for (ArrowBuf buf : inputBuffers) {
+ outputBuffers.add(codec.compress(allocator, buf));
+ }
+ return outputBuffers;
+ }
+
+ private List<ArrowBuf> deCompressBuffers(List<ArrowBuf> inputBuffers) {
+ List<ArrowBuf> outputBuffers = new ArrayList<>(inputBuffers.size());
+ for (ArrowBuf buf : inputBuffers) {
+ outputBuffers.add(codec.decompress(allocator, buf));
+ }
+ return outputBuffers;
+ }
+
+ @Test
+ public void testCompressFixedWidthBuffers() throws Exception {
+ // prepare vector to compress
+ IntVector origVec = new IntVector("vec", allocator);
+ origVec.allocateNew(vectorLength);
+ for (int i = 0; i < vectorLength; i++) {
+ if (i % 10 == 0) {
+ origVec.setNull(i);
+ } else {
+ origVec.set(i, i);
+ }
+ }
+ origVec.setValueCount(vectorLength);
+ int nullCount = origVec.getNullCount();
+
+ // compress & decompress
+ List<ArrowBuf> origBuffers = origVec.getFieldBuffers();
+ List<ArrowBuf> compressedBuffers = compressBuffers(origBuffers);
+ List<ArrowBuf> decompressedBuffers = deCompressBuffers(compressedBuffers);
+
+ assertEquals(2, decompressedBuffers.size());
+
+ // orchestrate new vector
+ IntVector newVec = new IntVector("new vec", allocator);
+ newVec.loadFieldBuffers(new ArrowFieldNode(vectorLength, nullCount), decompressedBuffers);
+
+ // verify new vector
+ assertEquals(vectorLength, newVec.getValueCount());
+ for (int i = 0; i < vectorLength; i++) {
+ if (i % 10 == 0) {
+ assertTrue(newVec.isNull(i));
+ } else {
+ assertEquals(i, newVec.get(i));
+ }
+ }
+
+ newVec.close();
+ AutoCloseables.close(decompressedBuffers);
+ }
+
+ @Test
+ public void testCompressVariableWidthBuffers() throws Exception {
+ // prepare vector to compress
+ VarCharVector origVec = new VarCharVector("vec", allocator);
+ origVec.allocateNew();
+ for (int i = 0; i < vectorLength; i++) {
+ if (i % 10 == 0) {
+ origVec.setNull(i);
+ } else {
+ origVec.setSafe(i, String.valueOf(i).getBytes());
+ }
+ }
+ origVec.setValueCount(vectorLength);
+ int nullCount = origVec.getNullCount();
+
+ // compress & decompress
+ List<ArrowBuf> origBuffers = origVec.getFieldBuffers();
+ List<ArrowBuf> compressedBuffers = compressBuffers(origBuffers);
+ List<ArrowBuf> decompressedBuffers = deCompressBuffers(compressedBuffers);
+
+ assertEquals(3, decompressedBuffers.size());
+
+ // orchestrate new vector
+ VarCharVector newVec = new VarCharVector("new vec", allocator);
+ newVec.loadFieldBuffers(new ArrowFieldNode(vectorLength, nullCount), decompressedBuffers);
+
+ // verify new vector
+ assertEquals(vectorLength, newVec.getValueCount());
+ for (int i = 0; i < vectorLength; i++) {
+ if (i % 10 == 0) {
+ assertTrue(newVec.isNull(i));
+ } else {
+ assertArrayEquals(String.valueOf(i).getBytes(), newVec.get(i));
+ }
+ }
+
+ newVec.close();
+ AutoCloseables.close(decompressedBuffers);
+ }
+
+ @Test
+ public void testEmptyBuffer() throws Exception {
+ final VarBinaryVector origVec = new VarBinaryVector("vec", allocator);
+
+ origVec.allocateNew(vectorLength);
+
+ // Do not set any values (all missing)
+ origVec.setValueCount(vectorLength);
+
+ final List<ArrowBuf> origBuffers = origVec.getFieldBuffers();
+ final List<ArrowBuf> compressedBuffers = compressBuffers(origBuffers);
+ final List<ArrowBuf> decompressedBuffers = deCompressBuffers(compressedBuffers);
+
+ // orchestrate new vector
+ VarBinaryVector newVec = new VarBinaryVector("new vec", allocator);
+ newVec.loadFieldBuffers(new ArrowFieldNode(vectorLength, vectorLength), decompressedBuffers);
+
+ // verify new vector
+ assertEquals(vectorLength, newVec.getValueCount());
+ for (int i = 0; i < vectorLength; i++) {
+ assertTrue(newVec.isNull(i));
+ }
+
+ newVec.close();
+ AutoCloseables.close(decompressedBuffers);
+ }
+}
diff --git a/src/arrow/java/dataset/CMakeLists.txt b/src/arrow/java/dataset/CMakeLists.txt
new file mode 100644
index 000000000..07e2d0ae8
--- /dev/null
+++ b/src/arrow/java/dataset/CMakeLists.txt
@@ -0,0 +1,43 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#
+# arrow_dataset_java
+#
+
+# Headers: top level
+
+project(arrow_dataset_java)
+
+# Find java/jni
+include(FindJava)
+include(UseJava)
+include(FindJNI)
+
+message("generating headers to ${JNI_HEADERS_DIR}")
+
+add_jar(arrow_dataset_java
+ src/main/java/org/apache/arrow/dataset/jni/JniLoader.java
+ src/main/java/org/apache/arrow/dataset/jni/JniWrapper.java
+ src/main/java/org/apache/arrow/dataset/jni/NativeRecordBatchHandle.java
+ src/main/java/org/apache/arrow/dataset/file/JniWrapper.java
+ src/main/java/org/apache/arrow/dataset/jni/NativeMemoryPool.java
+ src/main/java/org/apache/arrow/dataset/jni/ReservationListener.java
+ GENERATE_NATIVE_HEADERS
+ arrow_dataset_java-native
+ DESTINATION
+ ${JNI_HEADERS_DIR})
diff --git a/src/arrow/java/dataset/pom.xml b/src/arrow/java/dataset/pom.xml
new file mode 100644
index 000000000..0a393c50a
--- /dev/null
+++ b/src/arrow/java/dataset/pom.xml
@@ -0,0 +1,134 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+ license agreements. See the NOTICE file distributed with this work for additional
+ information regarding copyright ownership. The ASF licenses this file to
+ You under the Apache License, Version 2.0 (the "License"); you may not use
+ this file except in compliance with the License. You may obtain a copy of
+ the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+ by applicable law or agreed to in writing, software distributed under the
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ OF ANY KIND, either express or implied. See the License for the specific
+ language governing permissions and limitations under the License. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <parent>
+ <artifactId>arrow-java-root</artifactId>
+ <groupId>org.apache.arrow</groupId>
+ <version>6.0.1</version>
+ </parent>
+ <modelVersion>4.0.0</modelVersion>
+
+ <artifactId>arrow-dataset</artifactId>
+ <name>Arrow Java Dataset</name>
+ <description>Java implementation of Arrow Dataset API/Framework</description>
+ <packaging>jar</packaging>
+ <properties>
+ <arrow.cpp.build.dir>../../../cpp/release-build/</arrow.cpp.build.dir>
+ <protobuf.version>2.5.0</protobuf.version>
+ <parquet.version>1.11.0</parquet.version>
+ <avro.version>1.8.2</avro.version>
+ </properties>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-vector</artifactId>
+ <version>${project.version}</version>
+ <scope>compile</scope>
+ <classifier>${arrow.vector.classifier}</classifier>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-memory-core</artifactId>
+ <version>${project.version}</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-memory-netty</artifactId>
+ <version>${project.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.parquet</groupId>
+ <artifactId>parquet-avro</artifactId>
+ <version>${parquet.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.parquet</groupId>
+ <artifactId>parquet-hadoop</artifactId>
+ <version>${parquet.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ <version>${dep.hadoop.version}</version>
+ <scope>test</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>commons-logging</groupId>
+ <artifactId>commons-logging</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>javax.servlet</groupId>
+ <artifactId>servlet-api</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-log4j12</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.avro</groupId>
+ <artifactId>avro</artifactId>
+ <version>${avro.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ <version>${dep.guava.version}</version>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+ <build>
+ <resources>
+ <resource>
+ <directory>${arrow.cpp.build.dir}</directory>
+ <includes>
+ <include>**/libarrow_dataset_jni.*</include>
+ </includes>
+ </resource>
+ </resources>
+
+ <plugins>
+ <plugin>
+ <groupId>org.xolstice.maven.plugins</groupId>
+ <artifactId>protobuf-maven-plugin</artifactId>
+ <version>0.5.1</version>
+ <configuration>
+ <protocArtifact>com.google.protobuf:protoc:${protobuf.version}:exe:${os.detected.classifier}
+ </protocArtifact>
+ <protoSourceRoot>../../cpp/src/jni/dataset/proto</protoSourceRoot>
+ </configuration>
+ <executions>
+ <execution>
+ <goals>
+ <goal>compile</goal>
+ <goal>test-compile</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+
+</project>
diff --git a/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/file/FileFormat.java b/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/file/FileFormat.java
new file mode 100644
index 000000000..e341d46be
--- /dev/null
+++ b/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/file/FileFormat.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.dataset.file;
+
+/**
+ * File format definitions.
+ */
+public enum FileFormat {
+ PARQUET(0),
+ NONE(-1);
+
+ private int id;
+
+ FileFormat(int id) {
+ this.id = id;
+ }
+
+ public int id() {
+ return id;
+ }
+}
diff --git a/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/file/FileSystemDatasetFactory.java b/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/file/FileSystemDatasetFactory.java
new file mode 100644
index 000000000..1268d11fe
--- /dev/null
+++ b/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/file/FileSystemDatasetFactory.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.dataset.file;
+
+import org.apache.arrow.dataset.jni.NativeDatasetFactory;
+import org.apache.arrow.dataset.jni.NativeMemoryPool;
+import org.apache.arrow.memory.BufferAllocator;
+
+/**
+ * Java binding of the C++ FileSystemDatasetFactory.
+ */
+public class FileSystemDatasetFactory extends NativeDatasetFactory {
+
+ public FileSystemDatasetFactory(BufferAllocator allocator, NativeMemoryPool memoryPool, FileFormat format,
+ String uri) {
+ super(allocator, memoryPool, createNative(format, uri));
+ }
+
+ private static long createNative(FileFormat format, String uri) {
+ return JniWrapper.get().makeFileSystemDatasetFactory(uri, format.id());
+ }
+
+}
diff --git a/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/file/JniWrapper.java b/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/file/JniWrapper.java
new file mode 100644
index 000000000..1af307aac
--- /dev/null
+++ b/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/file/JniWrapper.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.dataset.file;
+
+import org.apache.arrow.dataset.jni.JniLoader;
+
+/**
+ * JniWrapper for filesystem based {@link org.apache.arrow.dataset.source.Dataset} implementations.
+ */
+public class JniWrapper {
+
+ private static final JniWrapper INSTANCE = new JniWrapper();
+
+ public static JniWrapper get() {
+ return INSTANCE;
+ }
+
+ private JniWrapper() {
+ JniLoader.get().ensureLoaded();
+ }
+
+ /**
+ * Create FileSystemDatasetFactory and return its native pointer. The pointer is pointing to a
+ * intermediate shared_ptr of the factory instance.
+ * @param uri file uri to read
+ * @param fileFormat file format ID
+ * @return the native pointer of the arrow::dataset::FileSystemDatasetFactory instance.
+ * @see FileFormat
+ */
+ public native long makeFileSystemDatasetFactory(String uri, int fileFormat);
+
+}
diff --git a/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/DirectReservationListener.java b/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/DirectReservationListener.java
new file mode 100644
index 000000000..72a1cadcf
--- /dev/null
+++ b/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/DirectReservationListener.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.dataset.jni;
+
+import java.lang.reflect.Field;
+import java.lang.reflect.Method;
+import java.util.concurrent.atomic.AtomicLong;
+
+import org.apache.arrow.util.VisibleForTesting;
+
+/**
+ * Reserving Java direct memory bytes from java.nio.Bits. Used by Java Dataset API's C++ memory
+ * pool implementation. This makes memory allocated by the pool to be controlled by JVM option
+ * "-XX:MaxDirectMemorySize".
+ */
+public class DirectReservationListener implements ReservationListener {
+ private final Method methodReserve;
+ private final Method methodUnreserve;
+
+ private DirectReservationListener() {
+ try {
+ final Class<?> classBits = Class.forName("java.nio.Bits");
+ methodReserve = classBits.getDeclaredMethod("reserveMemory", long.class, int.class);
+ methodReserve.setAccessible(true);
+ methodUnreserve = classBits.getDeclaredMethod("unreserveMemory", long.class, int.class);
+ methodUnreserve.setAccessible(true);
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private static final DirectReservationListener INSTANCE = new DirectReservationListener();
+
+ public static DirectReservationListener instance() {
+ return INSTANCE;
+ }
+
+ /**
+ * Reserve bytes by invoking java.nio.java.Bitjava.nio.Bitss#reserveMemory.
+ */
+ @Override
+ public void reserve(long size) {
+ try {
+ if (size > Integer.MAX_VALUE) {
+ throw new IllegalArgumentException("reserve size should not be larger than Integer.MAX_VALUE (0x7fffffff)");
+ }
+ methodReserve.invoke(null, (int) size, (int) size);
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ /**
+ * Unreserve bytes by invoking java.nio.java.Bitjava.nio.Bitss#unreserveMemory.
+ */
+ @Override
+ public void unreserve(long size) {
+ try {
+ if (size > Integer.MAX_VALUE) {
+ throw new IllegalArgumentException("unreserve size should not be larger than Integer.MAX_VALUE (0x7fffffff)");
+ }
+ methodUnreserve.invoke(null, (int) size, (int) size);
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ /**
+ * Get current reservation of jVM direct memory. Visible for testing.
+ */
+ @VisibleForTesting
+ public long getCurrentDirectMemReservation() {
+ try {
+ final Class<?> classBits = Class.forName("java.nio.Bits");
+ final Field f = classBits.getDeclaredField("reservedMemory");
+ f.setAccessible(true);
+ return ((AtomicLong) f.get(null)).get();
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+}
diff --git a/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/JniLoader.java b/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/JniLoader.java
new file mode 100644
index 000000000..15ce5448b
--- /dev/null
+++ b/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/JniLoader.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.dataset.jni;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.StandardCopyOption;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * The JniLoader for Dataset API's native implementation.
+ */
+public final class JniLoader {
+
+ private static final JniLoader INSTANCE = new JniLoader(Collections.singletonList("arrow_dataset_jni"));
+
+ public static JniLoader get() {
+ return INSTANCE;
+ }
+
+ private final Set<String> librariesToLoad;
+
+ private JniLoader(List<String> libraryNames) {
+ librariesToLoad = new HashSet<>(libraryNames);
+ }
+
+ private boolean finished() {
+ return librariesToLoad.isEmpty();
+ }
+
+ /**
+ * If required JNI libraries are not loaded, then load them.
+ */
+ public void ensureLoaded() {
+ if (finished()) {
+ return;
+ }
+ loadRemaining();
+ }
+
+ private synchronized void loadRemaining() {
+ // The method is protected by a mutex via synchronized, if more than one thread race to call
+ // loadRemaining, at same time only one will do the actual loading and the others will wait for
+ // the mutex to be acquired then check on the remaining list: if there are libraries that were not
+ // successfully loaded then the mutex owner will try to load them again.
+ if (finished()) {
+ return;
+ }
+ List<String> libs = new ArrayList<>(librariesToLoad);
+ for (String lib : libs) {
+ load(lib);
+ librariesToLoad.remove(lib);
+ }
+ }
+
+ private void load(String name) {
+ final String libraryToLoad = System.mapLibraryName(name);
+ try {
+ File temp = File.createTempFile("jnilib-", ".tmp", new File(System.getProperty("java.io.tmpdir")));
+ try (final InputStream is
+ = JniWrapper.class.getClassLoader().getResourceAsStream(libraryToLoad)) {
+ if (is == null) {
+ throw new FileNotFoundException(libraryToLoad);
+ }
+ Files.copy(is, temp.toPath(), StandardCopyOption.REPLACE_EXISTING);
+ System.load(temp.getAbsolutePath());
+ }
+ } catch (IOException e) {
+ throw new IllegalStateException("error loading native libraries: " + e);
+ }
+ }
+}
diff --git a/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/JniWrapper.java b/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/JniWrapper.java
new file mode 100644
index 000000000..7dd54e764
--- /dev/null
+++ b/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/JniWrapper.java
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.dataset.jni;
+
+/**
+ * JNI wrapper for Dataset API's native implementation.
+ */
+public class JniWrapper {
+
+ private static final JniWrapper INSTANCE = new JniWrapper();
+
+ public static JniWrapper get() {
+ return INSTANCE;
+ }
+
+ private JniWrapper() {
+ JniLoader.get().ensureLoaded();
+ }
+
+ /**
+ * Release the DatasetFactory by destroying its reference held by JNI wrapper.
+ *
+ * @param datasetFactoryId the native pointer of the arrow::dataset::DatasetFactory instance.
+ */
+ public native void closeDatasetFactory(long datasetFactoryId);
+
+ /**
+ * Get a serialized schema from native instance of a DatasetFactory.
+ *
+ * @param datasetFactoryId the native pointer of the arrow::dataset::DatasetFactory instance.
+ * @return the serialized schema
+ * @see org.apache.arrow.vector.types.pojo.Schema
+ */
+ public native byte[] inspectSchema(long datasetFactoryId);
+
+ /**
+ * Create Dataset from a DatasetFactory and get the native pointer of the Dataset.
+ *
+ * @param datasetFactoryId the native pointer of the arrow::dataset::DatasetFactory instance.
+ * @param schema the predefined schema of the resulting Dataset.
+ * @return the native pointer of the arrow::dataset::Dataset instance.
+ */
+ public native long createDataset(long datasetFactoryId, byte[] schema);
+
+ /**
+ * Release the Dataset by destroying its reference held by JNI wrapper.
+ *
+ * @param datasetId the native pointer of the arrow::dataset::Dataset instance.
+ */
+ public native void closeDataset(long datasetId);
+
+ /**
+ * Create Scanner from a Dataset and get the native pointer of the Dataset.
+ * @param datasetId the native pointer of the arrow::dataset::Dataset instance.
+ * @param columns desired column names.
+ * Columns not in this list will not be emitted when performing scan operation. Null equals
+ * to "all columns".
+ * @param batchSize batch size of scanned record batches.
+ * @param memoryPool identifier of memory pool used in the native scanner.
+ * @return the native pointer of the arrow::dataset::Scanner instance.
+ */
+ public native long createScanner(long datasetId, String[] columns, long batchSize, long memoryPool);
+
+ /**
+ * Get a serialized schema from native instance of a Scanner.
+ *
+ * @param scannerId the native pointer of the arrow::dataset::Scanner instance.
+ * @return the serialized schema
+ * @see org.apache.arrow.vector.types.pojo.Schema
+ */
+ public native byte[] getSchemaFromScanner(long scannerId);
+
+ /**
+ * Release the Scanner by destroying its reference held by JNI wrapper.
+ * @param scannerId the native pointer of the arrow::dataset::Scanner instance.
+ */
+ public native void closeScanner(long scannerId);
+
+ /**
+ * Read next record batch from the specified scanner.
+ * @param scannerId the native pointer of the arrow::dataset::Scanner instance.
+ * @return an instance of {@link NativeRecordBatchHandle} describing the overall layout of the native record batch.
+ */
+ public native NativeRecordBatchHandle nextRecordBatch(long scannerId);
+
+ /**
+ * Release the Buffer by destroying its reference held by JNI wrapper.
+ * @param bufferId the native pointer of the arrow::Buffer instance.
+ */
+ public native void releaseBuffer(long bufferId);
+
+}
diff --git a/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeContext.java b/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeContext.java
new file mode 100644
index 000000000..7f6dfbc02
--- /dev/null
+++ b/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeContext.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.dataset.jni;
+
+import org.apache.arrow.memory.BufferAllocator;
+
+/**
+ * Context for relevant classes of NativeDataset.
+ */
+public class NativeContext {
+ private final BufferAllocator allocator;
+ private final NativeMemoryPool memoryPool;
+
+ /**
+ * Constructor.
+ *
+ * @param allocator The allocator in use.
+ * @param memoryPool Native memory pool.
+ */
+ public NativeContext(BufferAllocator allocator, NativeMemoryPool memoryPool) {
+ this.allocator = allocator;
+ this.memoryPool = memoryPool;
+ }
+
+ /**
+ * Returns the allocator which is in use.
+ */
+ public BufferAllocator getAllocator() {
+ return allocator;
+ }
+
+ /**
+ * Returns the native memory pool.
+ */
+ public NativeMemoryPool getMemoryPool() {
+ return memoryPool;
+ }
+}
diff --git a/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeDataset.java b/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeDataset.java
new file mode 100644
index 000000000..30ff1a930
--- /dev/null
+++ b/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeDataset.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.dataset.jni;
+
+import org.apache.arrow.dataset.scanner.ScanOptions;
+import org.apache.arrow.dataset.source.Dataset;
+
+/**
+ * Native implementation of {@link Dataset}.
+ */
+public class NativeDataset implements Dataset {
+
+ private final NativeContext context;
+ private final long datasetId;
+
+ private boolean closed = false;
+
+ public NativeDataset(NativeContext context, long datasetId) {
+ this.context = context;
+ this.datasetId = datasetId;
+ }
+
+ @Override
+ public synchronized NativeScanner newScan(ScanOptions options) {
+ if (closed) {
+ throw new NativeInstanceReleasedException();
+ }
+ long scannerId = JniWrapper.get().createScanner(datasetId, options.getColumns().orElse(null),
+ options.getBatchSize(), context.getMemoryPool().getNativeInstanceId());
+ return new NativeScanner(context, scannerId);
+ }
+
+ @Override
+ public synchronized void close() {
+ if (closed) {
+ return;
+ }
+ closed = true;
+ JniWrapper.get().closeDataset(datasetId);
+ }
+}
diff --git a/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeDatasetFactory.java b/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeDatasetFactory.java
new file mode 100644
index 000000000..993d44fa2
--- /dev/null
+++ b/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeDatasetFactory.java
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.dataset.jni;
+
+import java.io.IOException;
+
+import org.apache.arrow.dataset.source.DatasetFactory;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.util.SchemaUtility;
+
+/**
+ * Native implementation of {@link DatasetFactory}.
+ */
+public class NativeDatasetFactory implements DatasetFactory {
+ private final long datasetFactoryId;
+ private final NativeMemoryPool memoryPool;
+ private final BufferAllocator allocator;
+
+ private boolean closed = false;
+
+ /**
+ * Constructor.
+ *
+ * @param allocator a context allocator associated with this factory. Any buffer that will be created natively will
+ * be then bound to this allocator.
+ * @param memoryPool the native memory pool associated with this factory. Any buffer created natively should request
+ * for memory spaces from this memory pool. This is a mapped instance of c++ arrow::MemoryPool.
+ * @param datasetFactoryId an ID, at the same time the native pointer of the underlying native instance of this
+ * factory. Make sure in c++ side the pointer is pointing to the shared pointer wrapping
+ * the actual instance so we could successfully decrease the reference count once
+ * {@link #close} is called.
+ * @see #close()
+ */
+ public NativeDatasetFactory(BufferAllocator allocator, NativeMemoryPool memoryPool, long datasetFactoryId) {
+ this.allocator = allocator;
+ this.memoryPool = memoryPool;
+ this.datasetFactoryId = datasetFactoryId;
+ }
+
+ @Override
+ public Schema inspect() {
+ final byte[] buffer;
+ synchronized (this) {
+ if (closed) {
+ throw new NativeInstanceReleasedException();
+ }
+ buffer = JniWrapper.get().inspectSchema(datasetFactoryId);
+ }
+ try {
+ return SchemaUtility.deserialize(buffer, allocator);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Override
+ public NativeDataset finish() {
+ return finish(inspect());
+ }
+
+ @Override
+ public NativeDataset finish(Schema schema) {
+ try {
+ byte[] serialized = SchemaUtility.serialize(schema);
+ synchronized (this) {
+ if (closed) {
+ throw new NativeInstanceReleasedException();
+ }
+ return new NativeDataset(new NativeContext(allocator, memoryPool),
+ JniWrapper.get().createDataset(datasetFactoryId, serialized));
+ }
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ /**
+ * Close this factory by release the pointer of the native instance.
+ */
+ @Override
+ public synchronized void close() {
+ if (closed) {
+ return;
+ }
+ closed = true;
+ JniWrapper.get().closeDatasetFactory(datasetFactoryId);
+ }
+}
diff --git a/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeInstanceReleasedException.java b/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeInstanceReleasedException.java
new file mode 100644
index 000000000..3231ca23a
--- /dev/null
+++ b/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeInstanceReleasedException.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.dataset.jni;
+
+/**
+ * Thrown if trying to operate on a native instance that is already released.
+ */
+public class NativeInstanceReleasedException extends RuntimeException {
+ public NativeInstanceReleasedException() {
+ super("Native instance has been released");
+ }
+
+ public NativeInstanceReleasedException(String message) {
+ super(message);
+ }
+}
diff --git a/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeMemoryPool.java b/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeMemoryPool.java
new file mode 100644
index 000000000..83825776b
--- /dev/null
+++ b/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeMemoryPool.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.dataset.jni;
+
+/**
+ * C++ memory pool(arrow::MemoryPool)'s Java mapped instance.
+ */
+public class NativeMemoryPool implements AutoCloseable {
+ private final long nativeInstanceId;
+
+ static {
+ JniLoader.get().ensureLoaded();
+ }
+
+ private NativeMemoryPool(long nativeInstanceId) {
+ this.nativeInstanceId = nativeInstanceId;
+ }
+
+ /**
+ * Get the default memory pool. This will return arrow::default_memory_pool() directly.
+ */
+ public static NativeMemoryPool getDefault() {
+ return new NativeMemoryPool(getDefaultMemoryPool());
+ }
+
+ /**
+ * Create a listenable memory pool (see also: arrow::ReservationListenableMemoryPool) with
+ * a specific listener. All buffers created from the memory pool should take enough reservation
+ * from the listener in advance.
+ */
+ public static NativeMemoryPool createListenable(ReservationListener listener) {
+ return new NativeMemoryPool(createListenableMemoryPool(listener));
+ }
+
+ /**
+ * Return native instance ID of this memory pool.
+ */
+ public long getNativeInstanceId() {
+ return nativeInstanceId;
+ }
+
+ /**
+ * Get current allocated bytes.
+ */
+ public long getBytesAllocated() {
+ return bytesAllocated(nativeInstanceId);
+ }
+
+ @Override
+ public void close() throws Exception {
+ releaseMemoryPool(nativeInstanceId);
+ }
+
+ private static native long getDefaultMemoryPool();
+
+ private static native long createListenableMemoryPool(ReservationListener listener);
+
+ private static native void releaseMemoryPool(long id);
+
+ private static native long bytesAllocated(long id);
+}
diff --git a/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeRecordBatchHandle.java b/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeRecordBatchHandle.java
new file mode 100644
index 000000000..dd90fd1c1
--- /dev/null
+++ b/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeRecordBatchHandle.java
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.dataset.jni;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Hold pointers to a Arrow C++ RecordBatch.
+ */
+public class NativeRecordBatchHandle {
+
+ private final long numRows;
+ private final List<Field> fields;
+ private final List<Buffer> buffers;
+
+ /**
+ * Constructor.
+ *
+ * @param numRows Total row number of the associated RecordBatch
+ * @param fields Metadata of fields
+ * @param buffers Retained Arrow buffers
+ */
+ public NativeRecordBatchHandle(long numRows, Field[] fields, Buffer[] buffers) {
+ this.numRows = numRows;
+ this.fields = Arrays.asList(fields);
+ this.buffers = Arrays.asList(buffers);
+ }
+
+ /**
+ * Returns the total row number of the associated RecordBatch.
+ * @return Total row number of the associated RecordBatch.
+ */
+ public long getNumRows() {
+ return numRows;
+ }
+
+ /**
+ * Returns Metadata of fields.
+ * @return Metadata of fields.
+ */
+ public List<Field> getFields() {
+ return fields;
+ }
+
+ /**
+ * Returns the buffers.
+ * @return Retained Arrow buffers.
+ */
+ public List<Buffer> getBuffers() {
+ return buffers;
+ }
+
+ /**
+ * Field metadata.
+ */
+ public static class Field {
+ public final long length;
+ public final long nullCount;
+
+ public Field(long length, long nullCount) {
+ this.length = length;
+ this.nullCount = nullCount;
+ }
+ }
+
+ /**
+ * Pointers and metadata of the targeted Arrow buffer.
+ */
+ public static class Buffer {
+ public final long nativeInstanceId;
+ public final long memoryAddress;
+ public final long size;
+ public final long capacity;
+
+ /**
+ * Constructor.
+ *
+ * @param nativeInstanceId Native instance's id
+ * @param memoryAddress Memory address of the first byte
+ * @param size Size (in bytes)
+ * @param capacity Capacity (in bytes)
+ */
+ public Buffer(long nativeInstanceId, long memoryAddress, long size, long capacity) {
+ this.nativeInstanceId = nativeInstanceId;
+ this.memoryAddress = memoryAddress;
+ this.size = size;
+ this.capacity = capacity;
+ }
+ }
+}
diff --git a/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeScanTask.java b/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeScanTask.java
new file mode 100644
index 000000000..14d89c2ee
--- /dev/null
+++ b/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeScanTask.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.dataset.jni;
+
+import org.apache.arrow.dataset.scanner.ScanTask;
+
+/**
+ * Native implementation of {@link ScanTask}. Currently RecordBatches are iterated directly by the scanner
+ * id via {@link JniWrapper}, thus we allow only one-time execution of method {@link #execute()}. If a re-scan
+ * operation is expected, call {@link NativeDataset#newScan} to create a new scanner instance.
+ */
+public class NativeScanTask implements ScanTask {
+ private final NativeScanner scanner;
+
+ /**
+ * Constructor.
+ */
+ public NativeScanTask(NativeScanner scanner) {
+ this.scanner = scanner;
+ }
+
+ @Override
+ public BatchIterator execute() {
+ return scanner.execute();
+ }
+
+ @Override
+ public void close() {
+ scanner.close();
+ }
+}
diff --git a/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeScanner.java b/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeScanner.java
new file mode 100644
index 000000000..24c298067
--- /dev/null
+++ b/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeScanner.java
@@ -0,0 +1,170 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.dataset.jni;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.NoSuchElementException;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReadWriteLock;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
+import java.util.stream.Collectors;
+
+import org.apache.arrow.dataset.scanner.ScanTask;
+import org.apache.arrow.dataset.scanner.Scanner;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.BufferLedger;
+import org.apache.arrow.memory.NativeUnderlyingMemory;
+import org.apache.arrow.memory.util.LargeMemoryUtil;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.util.SchemaUtility;
+
+/**
+ * Native implementation of {@link Scanner}. Note that it currently emits only a single scan task of type
+ * {@link NativeScanTask}, which is internally a combination of all scan task instances returned by the
+ * native scanner.
+ */
+public class NativeScanner implements Scanner {
+
+ private final AtomicBoolean executed = new AtomicBoolean(false);
+ private final NativeContext context;
+ private final long scannerId;
+
+ private final ReadWriteLock lock = new ReentrantReadWriteLock();
+ private final Lock writeLock = lock.writeLock();
+ private final Lock readLock = lock.readLock();
+ private boolean closed = false;
+
+ public NativeScanner(NativeContext context, long scannerId) {
+ this.context = context;
+ this.scannerId = scannerId;
+ }
+
+ ScanTask.BatchIterator execute() {
+ if (closed) {
+ throw new NativeInstanceReleasedException();
+ }
+ if (!executed.compareAndSet(false, true)) {
+ throw new UnsupportedOperationException("NativeScanner cannot be executed more than once. Consider creating " +
+ "new scanner instead");
+ }
+ return new ScanTask.BatchIterator() {
+ private ArrowRecordBatch peek = null;
+
+ @Override
+ public void close() {
+ NativeScanner.this.close();
+ }
+
+ @Override
+ public boolean hasNext() {
+ if (peek != null) {
+ return true;
+ }
+ final NativeRecordBatchHandle handle;
+ readLock.lock();
+ try {
+ if (closed) {
+ throw new NativeInstanceReleasedException();
+ }
+ handle = JniWrapper.get().nextRecordBatch(scannerId);
+ } finally {
+ readLock.unlock();
+ }
+ if (handle == null) {
+ return false;
+ }
+ final ArrayList<ArrowBuf> buffers = new ArrayList<>();
+ for (NativeRecordBatchHandle.Buffer buffer : handle.getBuffers()) {
+ final BufferAllocator allocator = context.getAllocator();
+ final int size = LargeMemoryUtil.checkedCastToInt(buffer.size);
+ final NativeUnderlyingMemory am = NativeUnderlyingMemory.create(allocator,
+ size, buffer.nativeInstanceId, buffer.memoryAddress);
+ BufferLedger ledger = am.associate(allocator);
+ ArrowBuf buf = new ArrowBuf(ledger, null, size, buffer.memoryAddress);
+ buffers.add(buf);
+ }
+
+ try {
+ final int numRows = LargeMemoryUtil.checkedCastToInt(handle.getNumRows());
+ peek = new ArrowRecordBatch(numRows, handle.getFields().stream()
+ .map(field -> new ArrowFieldNode(field.length, field.nullCount))
+ .collect(Collectors.toList()), buffers);
+ return true;
+ } finally {
+ buffers.forEach(buffer -> buffer.getReferenceManager().release());
+ }
+ }
+
+ @Override
+ public ArrowRecordBatch next() {
+ if (!hasNext()) {
+ throw new NoSuchElementException();
+ }
+
+ try {
+ return peek;
+ } finally {
+ peek = null;
+ }
+ }
+ };
+ }
+
+ @Override
+ public Iterable<? extends NativeScanTask> scan() {
+ if (closed) {
+ throw new NativeInstanceReleasedException();
+ }
+ return Collections.singletonList(new NativeScanTask(this));
+ }
+
+ @Override
+ public Schema schema() {
+ readLock.lock();
+ try {
+ if (closed) {
+ throw new NativeInstanceReleasedException();
+ }
+ return SchemaUtility.deserialize(JniWrapper.get().getSchemaFromScanner(scannerId), context.getAllocator());
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ } finally {
+ readLock.unlock();
+ }
+ }
+
+ @Override
+ public void close() {
+ writeLock.lock();
+ try {
+ if (closed) {
+ return;
+ }
+ closed = true;
+ JniWrapper.get().closeScanner(scannerId);
+ } finally {
+ writeLock.unlock();
+ }
+ }
+}
diff --git a/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/ReservationListener.java b/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/ReservationListener.java
new file mode 100644
index 000000000..f1ffdd2ac
--- /dev/null
+++ b/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/jni/ReservationListener.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.dataset.jni;
+
+/**
+ * Listener of buffer memory reservation. Used by native datasets.
+ */
+public interface ReservationListener {
+
+ /**
+ * Reserve bytes.
+ *
+ * @throws RuntimeException if request size cannot be granted
+ */
+ void reserve(long size);
+
+ /**
+ * Unreserve bytes.
+ */
+ void unreserve(long size);
+}
diff --git a/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/ScanOptions.java b/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/ScanOptions.java
new file mode 100644
index 000000000..f5a1af384
--- /dev/null
+++ b/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/ScanOptions.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.dataset.scanner;
+
+import java.util.Optional;
+
+import org.apache.arrow.util.Preconditions;
+
+/**
+ * Options used during scanning.
+ */
+public class ScanOptions {
+ private final Optional<String[]> columns;
+ private final long batchSize;
+
+ /**
+ * Constructor.
+ * @param columns Projected columns. Empty for scanning all columns.
+ * @param batchSize Maximum row number of each returned {@link org.apache.arrow.vector.ipc.message.ArrowRecordBatch}
+ *
+ * @deprecated Deprecated. Use {@link #ScanOptions(long, Optional)} instead.
+ */
+ @Deprecated
+ public ScanOptions(String[] columns, long batchSize) {
+ this(batchSize, Optional.of(columns).map(present -> {
+ if (present.length == 0) {
+ // Backwards compatibility: See ARROW-13257, in the new constructor, we now use null to scan for all columns.
+ return null;
+ }
+ return present;
+ }));
+ }
+
+ /**
+ * Constructor.
+ * @param batchSize Maximum row number of each returned {@link org.apache.arrow.vector.ipc.message.ArrowRecordBatch}
+ * @param columns (Optional) Projected columns. {@link Optional#empty()} for scanning all columns. Otherwise,
+ * Only columns present in the Array will be scanned.
+ */
+ public ScanOptions(long batchSize, Optional<String[]> columns) {
+ Preconditions.checkNotNull(columns);
+ this.batchSize = batchSize;
+ this.columns = columns;
+ }
+
+ public ScanOptions(long batchSize) {
+ this(batchSize, Optional.empty());
+ }
+
+ public Optional<String[]> getColumns() {
+ return columns;
+ }
+
+ public long getBatchSize() {
+ return batchSize;
+ }
+}
diff --git a/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/ScanTask.java b/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/ScanTask.java
new file mode 100644
index 000000000..d07036a61
--- /dev/null
+++ b/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/ScanTask.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.dataset.scanner;
+
+import java.util.Iterator;
+
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+
+/**
+ * Read record batches from a range of a single data fragment. A
+ * ScanTask is meant to be a unit of work to be dispatched. The implementation
+ * must be thread and concurrent safe.
+ */
+public interface ScanTask extends AutoCloseable {
+
+ /**
+ * Creates and returns a {@link BatchIterator} instance.
+ */
+ BatchIterator execute();
+
+ /**
+ * The iterator implementation for {@link org.apache.arrow.vector.ipc.message.ArrowRecordBatch}s.
+ */
+ interface BatchIterator extends Iterator<ArrowRecordBatch>, AutoCloseable {
+
+ }
+}
diff --git a/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/Scanner.java b/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/Scanner.java
new file mode 100644
index 000000000..93a1b08f3
--- /dev/null
+++ b/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/Scanner.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.dataset.scanner;
+
+import org.apache.arrow.vector.types.pojo.Schema;
+
+/**
+ * A high level interface for scanning data over dataset.
+ */
+public interface Scanner extends AutoCloseable {
+
+ /**
+ * Perform the scan operation.
+ *
+ * @return a iterable set of {@link ScanTask}s. Each task is considered independent and it is allowed
+ * to execute the tasks concurrently to gain better performance.
+ */
+ Iterable<? extends ScanTask> scan();
+
+ /**
+ * Get the schema of this Scanner.
+ *
+ * @return the schema instance
+ */
+ Schema schema();
+}
diff --git a/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/source/Dataset.java b/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/source/Dataset.java
new file mode 100644
index 000000000..ce193581f
--- /dev/null
+++ b/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/source/Dataset.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.dataset.source;
+
+import org.apache.arrow.dataset.scanner.ScanOptions;
+import org.apache.arrow.dataset.scanner.Scanner;
+
+/**
+ * A container of Fragments which are the internal iterable unit of read data.
+ */
+public interface Dataset extends AutoCloseable {
+
+ /**
+ * Create a new Scanner using the provided scan options.
+ *
+ * @param options options used during creating Scanner
+ * @return the Scanner instance
+ */
+ Scanner newScan(ScanOptions options);
+}
diff --git a/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/source/DatasetFactory.java b/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/source/DatasetFactory.java
new file mode 100644
index 000000000..46b8545d6
--- /dev/null
+++ b/src/arrow/java/dataset/src/main/java/org/apache/arrow/dataset/source/DatasetFactory.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.dataset.source;
+
+import org.apache.arrow.vector.types.pojo.Schema;
+
+/**
+ * DatasetFactory provides a way to inspect a Dataset potential
+ * schema before materializing it. Thus, the user can peek the schema for
+ * data sources and decide on a unified schema.
+ */
+public interface DatasetFactory extends AutoCloseable {
+
+ /**
+ * Get unified schema for the resulting Dataset.
+ *
+ * @return the schema object inspected
+ */
+ Schema inspect();
+
+ /**
+ * Create a Dataset with auto-inferred schema. Which means, the schema of the resulting Dataset will be
+ * the same with calling {@link #inspect()} manually.
+ *
+ * @return the Dataset instance
+ */
+ Dataset finish();
+
+ /**
+ * Create a Dataset with predefined schema. Schema inference will not be performed.
+ *
+ * @param schema a predefined schema
+ * @return the Dataset instance
+ */
+ Dataset finish(Schema schema);
+}
diff --git a/src/arrow/java/dataset/src/main/java/org/apache/arrow/memory/NativeUnderlyingMemory.java b/src/arrow/java/dataset/src/main/java/org/apache/arrow/memory/NativeUnderlyingMemory.java
new file mode 100644
index 000000000..963fb6170
--- /dev/null
+++ b/src/arrow/java/dataset/src/main/java/org/apache/arrow/memory/NativeUnderlyingMemory.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+import org.apache.arrow.dataset.jni.JniWrapper;
+
+/**
+ * AllocationManager implementation for native allocated memory.
+ */
+public class NativeUnderlyingMemory extends AllocationManager {
+
+ private final int size;
+ private final long nativeInstanceId;
+ private final long address;
+
+ /**
+ * Constructor.
+ *
+ * @param accountingAllocator The accounting allocator instance
+ * @param size Size of underlying memory (in bytes)
+ * @param nativeInstanceId ID of the native instance
+ */
+ NativeUnderlyingMemory(BufferAllocator accountingAllocator, int size, long nativeInstanceId, long address) {
+ super(accountingAllocator);
+ this.size = size;
+ this.nativeInstanceId = nativeInstanceId;
+ this.address = address;
+ // pre-allocate bytes on accounting allocator
+ final AllocationListener listener = accountingAllocator.getListener();
+ try (final AllocationReservation reservation = accountingAllocator.newReservation()) {
+ listener.onPreAllocation(size);
+ reservation.reserve(size);
+ listener.onAllocation(size);
+ } catch (Exception e) {
+ release0();
+ throw e;
+ }
+ }
+
+ /**
+ * Alias to constructor.
+ */
+ public static NativeUnderlyingMemory create(BufferAllocator bufferAllocator, int size, long nativeInstanceId,
+ long address) {
+ return new NativeUnderlyingMemory(bufferAllocator, size, nativeInstanceId, address);
+ }
+
+ public BufferLedger associate(BufferAllocator allocator) {
+ return super.associate(allocator);
+ }
+
+ @Override
+ protected void release0() {
+ JniWrapper.get().releaseBuffer(nativeInstanceId);
+ }
+
+ @Override
+ public long getSize() {
+ return size;
+ }
+
+ @Override
+ protected long memoryAddress() {
+ return address;
+ }
+}
diff --git a/src/arrow/java/dataset/src/test/java/org/apache/arrow/dataset/ParquetWriteSupport.java b/src/arrow/java/dataset/src/test/java/org/apache/arrow/dataset/ParquetWriteSupport.java
new file mode 100644
index 000000000..c6299d135
--- /dev/null
+++ b/src/arrow/java/dataset/src/test/java/org/apache/arrow/dataset/ParquetWriteSupport.java
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.dataset;
+
+import java.io.File;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.arrow.util.Preconditions;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.parquet.avro.AvroParquetWriter;
+import org.apache.parquet.hadoop.ParquetWriter;
+
+/**
+ * Utility class for writing Parquet files using Avro based tools.
+ */
+public class ParquetWriteSupport implements AutoCloseable {
+
+ private final String path;
+ private final String uri;
+ private final ParquetWriter<GenericRecord> writer;
+ private final Schema avroSchema;
+ private final List<GenericRecord> writtenRecords = new ArrayList<>();
+ private final GenericRecordListBuilder recordListBuilder = new GenericRecordListBuilder();
+
+
+ public ParquetWriteSupport(String schemaName, File outputFolder) throws Exception {
+ avroSchema = readSchemaFromFile(schemaName);
+ path = outputFolder.getPath() + File.separator + "generated.parquet";
+ uri = "file://" + path;
+ writer = AvroParquetWriter.<GenericRecord>builder(new org.apache.hadoop.fs.Path(path))
+ .withSchema(avroSchema)
+ .build();
+ }
+
+ private static Schema readSchemaFromFile(String schemaName) throws Exception {
+ Path schemaPath = Paths.get(ParquetWriteSupport.class.getResource("/").getPath(),
+ "avroschema", schemaName);
+ return new org.apache.avro.Schema.Parser().parse(schemaPath.toFile());
+ }
+
+ public static ParquetWriteSupport writeTempFile(String schemaName, File outputFolder,
+ Object... values) throws Exception {
+ try (final ParquetWriteSupport writeSupport = new ParquetWriteSupport(schemaName, outputFolder)) {
+ writeSupport.writeRecords(values);
+ return writeSupport;
+ }
+ }
+
+ public void writeRecords(Object... values) throws Exception {
+ final List<GenericRecord> valueList = getRecordListBuilder().createRecordList(values);
+ writeRecords(valueList);
+ }
+
+ public void writeRecords(List<GenericRecord> records) throws Exception {
+ for (GenericRecord record : records) {
+ writeRecord(record);
+ }
+ }
+
+ public void writeRecord(GenericRecord record) throws Exception {
+ writtenRecords.add(record);
+ writer.write(record);
+ }
+
+ public String getOutputURI() {
+ return uri;
+ }
+
+ public Schema getAvroSchema() {
+ return avroSchema;
+ }
+
+ public GenericRecordListBuilder getRecordListBuilder() {
+ return recordListBuilder;
+ }
+
+ public List<GenericRecord> getWrittenRecords() {
+ return Collections.unmodifiableList(writtenRecords);
+ }
+
+ @Override
+ public void close() throws Exception {
+ writer.close();
+ }
+
+ public class GenericRecordListBuilder {
+ public final List<GenericRecord> createRecordList(Object... values) {
+ final int fieldCount = avroSchema.getFields().size();
+ Preconditions.checkArgument(values.length % fieldCount == 0,
+ "arg count of values should be divide by field number");
+ final List<GenericRecord> recordList = new ArrayList<>();
+ for (int i = 0; i < values.length / fieldCount; i++) {
+ final GenericRecord record = new GenericData.Record(avroSchema);
+ for (int j = 0; j < fieldCount; j++) {
+ record.put(j, values[i * fieldCount + j]);
+ }
+ recordList.add(record);
+ }
+ return Collections.unmodifiableList(recordList);
+ }
+ }
+}
diff --git a/src/arrow/java/dataset/src/test/java/org/apache/arrow/dataset/TestDataset.java b/src/arrow/java/dataset/src/test/java/org/apache/arrow/dataset/TestDataset.java
new file mode 100644
index 000000000..51dac15e5
--- /dev/null
+++ b/src/arrow/java/dataset/src/test/java/org/apache/arrow/dataset/TestDataset.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.dataset;
+
+import java.util.Iterator;
+import java.util.List;
+import java.util.Spliterator;
+import java.util.Spliterators;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+import java.util.stream.StreamSupport;
+
+import org.apache.arrow.dataset.scanner.ScanOptions;
+import org.apache.arrow.dataset.scanner.Scanner;
+import org.apache.arrow.dataset.source.Dataset;
+import org.apache.arrow.dataset.source.DatasetFactory;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.util.AutoCloseables;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.After;
+import org.junit.Before;
+
+public abstract class TestDataset {
+ private RootAllocator allocator = null;
+
+ @Before
+ public void setUp() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ }
+
+ @After
+ public void tearDown() {
+ allocator.close();
+ }
+
+ protected RootAllocator rootAllocator() {
+ return allocator;
+ }
+
+ protected List<ArrowRecordBatch> collectResultFromFactory(DatasetFactory factory, ScanOptions options) {
+ final Dataset dataset = factory.finish();
+ final Scanner scanner = dataset.newScan(options);
+ final List<ArrowRecordBatch> ret = stream(scanner.scan())
+ .flatMap(t -> stream(t.execute()))
+ .collect(Collectors.toList());
+ try {
+ AutoCloseables.close(scanner, dataset);
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ return ret;
+ }
+
+ protected Schema inferResultSchemaFromFactory(DatasetFactory factory, ScanOptions options) {
+ final Dataset dataset = factory.finish();
+ final Scanner scanner = dataset.newScan(options);
+ final Schema schema = scanner.schema();
+ try {
+ AutoCloseables.close(scanner, dataset);
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ return schema;
+ }
+
+ protected <T> Stream<T> stream(Iterable<T> iterable) {
+ return StreamSupport.stream(iterable.spliterator(), false);
+ }
+
+ protected <T> List<T> collect(Iterable<T> iterable) {
+ return stream(iterable).collect(Collectors.toList());
+ }
+
+ protected <T> Stream<T> stream(Iterator<T> iterator) {
+ return StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false);
+ }
+
+ protected <T> List<T> collect(Iterator<T> iterator) {
+ return stream(iterator).collect(Collectors.toList());
+ }
+}
diff --git a/src/arrow/java/dataset/src/test/java/org/apache/arrow/dataset/file/TestFileSystemDataset.java b/src/arrow/java/dataset/src/test/java/org/apache/arrow/dataset/file/TestFileSystemDataset.java
new file mode 100644
index 000000000..2b99f8283
--- /dev/null
+++ b/src/arrow/java/dataset/src/test/java/org/apache/arrow/dataset/file/TestFileSystemDataset.java
@@ -0,0 +1,338 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.dataset.file;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import java.util.Collections;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.stream.Collectors;
+
+import org.apache.arrow.dataset.ParquetWriteSupport;
+import org.apache.arrow.dataset.jni.NativeDataset;
+import org.apache.arrow.dataset.jni.NativeInstanceReleasedException;
+import org.apache.arrow.dataset.jni.NativeMemoryPool;
+import org.apache.arrow.dataset.jni.NativeScanTask;
+import org.apache.arrow.dataset.jni.NativeScanner;
+import org.apache.arrow.dataset.jni.TestNativeDataset;
+import org.apache.arrow.dataset.scanner.ScanOptions;
+import org.apache.arrow.dataset.scanner.ScanTask;
+import org.apache.arrow.util.AutoCloseables;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.VectorLoader;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.generic.GenericRecordBuilder;
+import org.junit.Assert;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.jupiter.api.Assertions;
+import org.junit.rules.TemporaryFolder;
+
+public class TestFileSystemDataset extends TestNativeDataset {
+
+ @ClassRule
+ public static final TemporaryFolder TMP = new TemporaryFolder();
+
+ public static final String AVRO_SCHEMA_USER = "user.avsc";
+
+ @Test
+ public void testBaseParquetRead() throws Exception {
+ ParquetWriteSupport writeSupport = ParquetWriteSupport.writeTempFile(AVRO_SCHEMA_USER, TMP.newFolder(), 1, "a");
+
+ FileSystemDatasetFactory factory = new FileSystemDatasetFactory(rootAllocator(), NativeMemoryPool.getDefault(),
+ FileFormat.PARQUET, writeSupport.getOutputURI());
+ ScanOptions options = new ScanOptions(100);
+ Schema schema = inferResultSchemaFromFactory(factory, options);
+ List<ArrowRecordBatch> datum = collectResultFromFactory(factory, options);
+
+ assertSingleTaskProduced(factory, options);
+ assertEquals(1, datum.size());
+ assertEquals(2, schema.getFields().size());
+ assertEquals("id", schema.getFields().get(0).getName());
+ assertEquals("name", schema.getFields().get(1).getName());
+ assertEquals(Types.MinorType.INT.getType(), schema.getFields().get(0).getType());
+ assertEquals(Types.MinorType.VARCHAR.getType(), schema.getFields().get(1).getType());
+ checkParquetReadResult(schema, writeSupport.getWrittenRecords(), datum);
+
+ AutoCloseables.close(datum);
+ }
+
+ @Test
+ public void testParquetProjectSingleColumn() throws Exception {
+ ParquetWriteSupport writeSupport = ParquetWriteSupport.writeTempFile(AVRO_SCHEMA_USER, TMP.newFolder(), 1, "a");
+
+ FileSystemDatasetFactory factory = new FileSystemDatasetFactory(rootAllocator(), NativeMemoryPool.getDefault(),
+ FileFormat.PARQUET, writeSupport.getOutputURI());
+ ScanOptions options = new ScanOptions(100, Optional.of(new String[]{"id"}));
+ Schema schema = inferResultSchemaFromFactory(factory, options);
+ List<ArrowRecordBatch> datum = collectResultFromFactory(factory, options);
+ org.apache.avro.Schema expectedSchema = truncateAvroSchema(writeSupport.getAvroSchema(), 0, 1);
+
+ assertSingleTaskProduced(factory, options);
+ assertEquals(1, schema.getFields().size());
+ assertEquals("id", schema.getFields().get(0).getName());
+ assertEquals(Types.MinorType.INT.getType(), schema.getFields().get(0).getType());
+ assertEquals(1, datum.size());
+ checkParquetReadResult(schema,
+ Collections.singletonList(
+ new GenericRecordBuilder(
+ expectedSchema)
+ .set("id", 1)
+ .build()), datum);
+
+ AutoCloseables.close(datum);
+ }
+
+ @Test
+ public void testParquetBatchSize() throws Exception {
+ ParquetWriteSupport writeSupport = ParquetWriteSupport.writeTempFile(AVRO_SCHEMA_USER, TMP.newFolder(),
+ 1, "a", 2, "b", 3, "c");
+
+ ScanOptions options = new ScanOptions(1);
+ FileSystemDatasetFactory factory = new FileSystemDatasetFactory(rootAllocator(), NativeMemoryPool.getDefault(),
+ FileFormat.PARQUET, writeSupport.getOutputURI());
+ Schema schema = inferResultSchemaFromFactory(factory, options);
+ List<ArrowRecordBatch> datum = collectResultFromFactory(factory, options);
+
+ assertSingleTaskProduced(factory, options);
+ assertEquals(3, datum.size());
+ datum.forEach(batch -> assertEquals(1, batch.getLength()));
+ checkParquetReadResult(schema, writeSupport.getWrittenRecords(), datum);
+
+ AutoCloseables.close(datum);
+ }
+
+ @Test
+ public void testEmptyProjectSelectsZeroColumns() throws Exception {
+ ParquetWriteSupport writeSupport = ParquetWriteSupport.writeTempFile(AVRO_SCHEMA_USER, TMP.newFolder(), 1, "a");
+
+ FileSystemDatasetFactory factory = new FileSystemDatasetFactory(rootAllocator(), NativeMemoryPool.getDefault(),
+ FileFormat.PARQUET, writeSupport.getOutputURI());
+ ScanOptions options = new ScanOptions(100, Optional.of(new String[0]));
+ Schema schema = inferResultSchemaFromFactory(factory, options);
+ List<ArrowRecordBatch> datum = collectResultFromFactory(factory, options);
+ org.apache.avro.Schema expectedSchema = org.apache.avro.Schema.createRecord(Collections.emptyList());
+
+ assertSingleTaskProduced(factory, options);
+ assertEquals(0, schema.getFields().size());
+ assertEquals(1, datum.size());
+ checkParquetReadResult(schema,
+ Collections.singletonList(
+ new GenericRecordBuilder(
+ expectedSchema)
+ .build()), datum);
+
+ AutoCloseables.close(datum);
+ }
+
+ @Test
+ public void testNullProjectSelectsAllColumns() throws Exception {
+ ParquetWriteSupport writeSupport = ParquetWriteSupport.writeTempFile(AVRO_SCHEMA_USER, TMP.newFolder(), 1, "a");
+
+ FileSystemDatasetFactory factory = new FileSystemDatasetFactory(rootAllocator(), NativeMemoryPool.getDefault(),
+ FileFormat.PARQUET, writeSupport.getOutputURI());
+ ScanOptions options = new ScanOptions(100, Optional.empty());
+ Schema schema = inferResultSchemaFromFactory(factory, options);
+ List<ArrowRecordBatch> datum = collectResultFromFactory(factory, options);
+
+ assertSingleTaskProduced(factory, options);
+ assertEquals(1, datum.size());
+ assertEquals(2, schema.getFields().size());
+ assertEquals("id", schema.getFields().get(0).getName());
+ assertEquals("name", schema.getFields().get(1).getName());
+ assertEquals(Types.MinorType.INT.getType(), schema.getFields().get(0).getType());
+ assertEquals(Types.MinorType.VARCHAR.getType(), schema.getFields().get(1).getType());
+ checkParquetReadResult(schema, writeSupport.getWrittenRecords(), datum);
+
+ AutoCloseables.close(datum);
+ }
+
+ @Test
+ public void testNoErrorWhenCloseAgain() throws Exception {
+ ParquetWriteSupport writeSupport = ParquetWriteSupport.writeTempFile(AVRO_SCHEMA_USER, TMP.newFolder(), 1, "a");
+
+ FileSystemDatasetFactory factory = new FileSystemDatasetFactory(rootAllocator(), NativeMemoryPool.getDefault(),
+ FileFormat.PARQUET, writeSupport.getOutputURI());
+
+ assertDoesNotThrow(() -> {
+ NativeDataset dataset = factory.finish();
+ dataset.close();
+ dataset.close();
+ });
+ }
+
+ @Test
+ public void testErrorThrownWhenScanAgain() throws Exception {
+ ParquetWriteSupport writeSupport = ParquetWriteSupport.writeTempFile(AVRO_SCHEMA_USER, TMP.newFolder(), 1, "a");
+
+ FileSystemDatasetFactory factory = new FileSystemDatasetFactory(rootAllocator(), NativeMemoryPool.getDefault(),
+ FileFormat.PARQUET, writeSupport.getOutputURI());
+ NativeDataset dataset = factory.finish();
+ ScanOptions options = new ScanOptions(100);
+ NativeScanner scanner = dataset.newScan(options);
+ List<? extends NativeScanTask> taskList1 = collect(scanner.scan());
+ List<? extends NativeScanTask> taskList2 = collect(scanner.scan());
+ NativeScanTask task1 = taskList1.get(0);
+ NativeScanTask task2 = taskList2.get(0);
+ List<ArrowRecordBatch> datum = collect(task1.execute());
+
+ UnsupportedOperationException uoe = assertThrows(UnsupportedOperationException.class, task2::execute);
+ Assertions.assertEquals("NativeScanner cannot be executed more than once. Consider creating new scanner instead",
+ uoe.getMessage());
+
+ AutoCloseables.close(datum);
+ AutoCloseables.close(taskList1);
+ AutoCloseables.close(taskList2);
+ AutoCloseables.close(scanner, dataset, factory);
+ }
+
+ @Test
+ public void testScanInOtherThread() throws Exception {
+ ExecutorService executor = Executors.newSingleThreadExecutor();
+ ParquetWriteSupport writeSupport = ParquetWriteSupport.writeTempFile(AVRO_SCHEMA_USER, TMP.newFolder(), 1, "a");
+
+ FileSystemDatasetFactory factory = new FileSystemDatasetFactory(rootAllocator(), NativeMemoryPool.getDefault(),
+ FileFormat.PARQUET, writeSupport.getOutputURI());
+ NativeDataset dataset = factory.finish();
+ ScanOptions options = new ScanOptions(100);
+ NativeScanner scanner = dataset.newScan(options);
+ List<? extends NativeScanTask> taskList = collect(scanner.scan());
+ NativeScanTask task = taskList.get(0);
+ List<ArrowRecordBatch> datum = executor.submit(() -> collect(task.execute())).get();
+
+ AutoCloseables.close(datum);
+ AutoCloseables.close(taskList);
+ AutoCloseables.close(scanner, dataset, factory);
+ }
+
+ @Test
+ public void testErrorThrownWhenScanAfterScannerClose() throws Exception {
+ ParquetWriteSupport writeSupport = ParquetWriteSupport.writeTempFile(AVRO_SCHEMA_USER, TMP.newFolder(), 1, "a");
+
+ FileSystemDatasetFactory factory = new FileSystemDatasetFactory(rootAllocator(), NativeMemoryPool.getDefault(),
+ FileFormat.PARQUET, writeSupport.getOutputURI());
+ NativeDataset dataset = factory.finish();
+ ScanOptions options = new ScanOptions(100);
+ NativeScanner scanner = dataset.newScan(options);
+ scanner.close();
+ assertThrows(NativeInstanceReleasedException.class, scanner::scan);
+ }
+
+ @Test
+ public void testErrorThrownWhenExecuteTaskAfterTaskClose() throws Exception {
+ ParquetWriteSupport writeSupport = ParquetWriteSupport.writeTempFile(AVRO_SCHEMA_USER, TMP.newFolder(), 1, "a");
+
+ FileSystemDatasetFactory factory = new FileSystemDatasetFactory(rootAllocator(), NativeMemoryPool.getDefault(),
+ FileFormat.PARQUET, writeSupport.getOutputURI());
+ NativeDataset dataset = factory.finish();
+ ScanOptions options = new ScanOptions(100);
+ NativeScanner scanner = dataset.newScan(options);
+ List<? extends NativeScanTask> tasks = collect(scanner.scan());
+ NativeScanTask task = tasks.get(0);
+ task.close();
+ assertThrows(NativeInstanceReleasedException.class, task::execute);
+ }
+
+ @Test
+ public void testErrorThrownWhenIterateOnIteratorAfterTaskClose() throws Exception {
+ ParquetWriteSupport writeSupport = ParquetWriteSupport.writeTempFile(AVRO_SCHEMA_USER, TMP.newFolder(), 1, "a");
+
+ FileSystemDatasetFactory factory = new FileSystemDatasetFactory(rootAllocator(), NativeMemoryPool.getDefault(),
+ FileFormat.PARQUET, writeSupport.getOutputURI());
+ NativeDataset dataset = factory.finish();
+ ScanOptions options = new ScanOptions(100);
+ NativeScanner scanner = dataset.newScan(options);
+ List<? extends NativeScanTask> tasks = collect(scanner.scan());
+ NativeScanTask task = tasks.get(0);
+ ScanTask.BatchIterator iterator = task.execute();
+ task.close();
+ assertThrows(NativeInstanceReleasedException.class, iterator::hasNext);
+ }
+
+ @Test
+ public void testMemoryAllocationOnAssociatedAllocator() throws Exception {
+ ParquetWriteSupport writeSupport = ParquetWriteSupport.writeTempFile(AVRO_SCHEMA_USER, TMP.newFolder(), 1, "a");
+ FileSystemDatasetFactory factory = new FileSystemDatasetFactory(rootAllocator(), NativeMemoryPool.getDefault(),
+ FileFormat.PARQUET, writeSupport.getOutputURI());
+ ScanOptions options = new ScanOptions(100);
+ long initReservation = rootAllocator().getAllocatedMemory();
+ List<ArrowRecordBatch> datum = collectResultFromFactory(factory, options);
+ final long expected_diff = datum.stream()
+ .flatMapToLong(batch -> batch.getBuffers()
+ .stream()
+ .mapToLong(buf -> buf.getReferenceManager().getAccountedSize())).sum();
+ long reservation = rootAllocator().getAllocatedMemory();
+ AutoCloseables.close(datum);
+ long finalReservation = rootAllocator().getAllocatedMemory();
+ Assert.assertEquals(expected_diff, reservation - initReservation);
+ Assert.assertEquals(-expected_diff, finalReservation - reservation);
+ }
+
+ private void checkParquetReadResult(Schema schema, List<GenericRecord> expected, List<ArrowRecordBatch> actual) {
+ assertEquals(expected.size(), actual.stream()
+ .mapToInt(ArrowRecordBatch::getLength)
+ .sum());
+ final int fieldCount = schema.getFields().size();
+ LinkedList<GenericRecord> expectedRemovable = new LinkedList<>(expected);
+ try (VectorSchemaRoot vsr = VectorSchemaRoot.create(schema, rootAllocator())) {
+ VectorLoader loader = new VectorLoader(vsr);
+ for (ArrowRecordBatch batch : actual) {
+ try {
+ assertEquals(fieldCount, batch.getNodes().size());
+ loader.load(batch);
+ int batchRowCount = vsr.getRowCount();
+ for (int i = 0; i < fieldCount; i++) {
+ FieldVector vector = vsr.getVector(i);
+ for (int j = 0; j < batchRowCount; j++) {
+ Object object = vector.getObject(j);
+ Object expectedObject = expectedRemovable.get(j).get(i);
+ assertEquals(Objects.toString(expectedObject),
+ Objects.toString(object));
+ }
+ }
+ for (int i = 0; i < batchRowCount; i++) {
+ expectedRemovable.poll();
+ }
+ } finally {
+ batch.close();
+ }
+ }
+ assertTrue(expectedRemovable.isEmpty());
+ }
+ }
+
+ private org.apache.avro.Schema truncateAvroSchema(org.apache.avro.Schema schema, int from, int to) {
+ List<org.apache.avro.Schema.Field> fields = schema.getFields().subList(from, to);
+ return org.apache.avro.Schema.createRecord(
+ fields.stream()
+ .map(f -> new org.apache.avro.Schema.Field(f.name(), f.schema(), f.doc(), f.defaultVal(), f.order()))
+ .collect(Collectors.toList()));
+ }
+}
diff --git a/src/arrow/java/dataset/src/test/java/org/apache/arrow/dataset/file/TestFileSystemDatasetFactory.java b/src/arrow/java/dataset/src/test/java/org/apache/arrow/dataset/file/TestFileSystemDatasetFactory.java
new file mode 100644
index 000000000..bddf96b5e
--- /dev/null
+++ b/src/arrow/java/dataset/src/test/java/org/apache/arrow/dataset/file/TestFileSystemDatasetFactory.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.dataset.file;
+
+import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import org.apache.arrow.dataset.jni.NativeMemoryPool;
+import org.apache.arrow.memory.RootAllocator;
+import org.junit.Test;
+
+public class TestFileSystemDatasetFactory {
+
+ @Test
+ public void testErrorHandling() {
+ RuntimeException e = assertThrows(RuntimeException.class, () -> {
+ new FileSystemDatasetFactory(new RootAllocator(Long.MAX_VALUE), NativeMemoryPool.getDefault(),
+ FileFormat.NONE, "file:///NON_EXIST_FILE");
+ });
+ assertEquals("illegal file format id: -1", e.getMessage());
+ }
+
+ @Test
+ public void testCloseAgain() {
+ assertDoesNotThrow(() -> {
+ FileSystemDatasetFactory factory = new FileSystemDatasetFactory(new RootAllocator(Long.MAX_VALUE),
+ NativeMemoryPool.getDefault(), FileFormat.PARQUET, "file:///NON_EXIST_FILE");
+ factory.close();
+ factory.close();
+ });
+ }
+}
diff --git a/src/arrow/java/dataset/src/test/java/org/apache/arrow/dataset/jni/TestNativeDataset.java b/src/arrow/java/dataset/src/test/java/org/apache/arrow/dataset/jni/TestNativeDataset.java
new file mode 100644
index 000000000..2a86a2568
--- /dev/null
+++ b/src/arrow/java/dataset/src/test/java/org/apache/arrow/dataset/jni/TestNativeDataset.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.dataset.jni;
+
+import org.apache.arrow.dataset.TestDataset;
+import org.apache.arrow.dataset.scanner.ScanOptions;
+import org.apache.arrow.dataset.scanner.Scanner;
+import org.apache.arrow.dataset.source.Dataset;
+import org.apache.arrow.dataset.source.DatasetFactory;
+import org.junit.Assert;
+
+public abstract class TestNativeDataset extends TestDataset {
+ protected void assertSingleTaskProduced(DatasetFactory factory, ScanOptions options) {
+ final Dataset dataset = factory.finish();
+ final Scanner scanner = dataset.newScan(options);
+ Assert.assertEquals(1L, stream(scanner.scan()).count());
+ }
+}
diff --git a/src/arrow/java/dataset/src/test/java/org/apache/arrow/dataset/jni/TestReservationListener.java b/src/arrow/java/dataset/src/test/java/org/apache/arrow/dataset/jni/TestReservationListener.java
new file mode 100644
index 000000000..2bc1b9a41
--- /dev/null
+++ b/src/arrow/java/dataset/src/test/java/org/apache/arrow/dataset/jni/TestReservationListener.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.dataset.jni;
+
+import java.util.List;
+import java.util.concurrent.atomic.AtomicLong;
+
+import org.apache.arrow.dataset.ParquetWriteSupport;
+import org.apache.arrow.dataset.TestDataset;
+import org.apache.arrow.dataset.file.FileFormat;
+import org.apache.arrow.dataset.file.FileSystemDatasetFactory;
+import org.apache.arrow.dataset.scanner.ScanOptions;
+import org.apache.arrow.util.AutoCloseables;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.junit.Assert;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+public class TestReservationListener extends TestDataset {
+
+ @ClassRule
+ public static final TemporaryFolder TMP = new TemporaryFolder();
+
+ public static final String AVRO_SCHEMA_USER = "user.avsc";
+
+ @Test
+ public void testDirectReservationListener() throws Exception {
+ ParquetWriteSupport writeSupport = ParquetWriteSupport.writeTempFile(AVRO_SCHEMA_USER, TMP.newFolder(), 1, "a");
+ NativeMemoryPool pool = NativeMemoryPool.createListenable(DirectReservationListener.instance());
+ FileSystemDatasetFactory factory = new FileSystemDatasetFactory(rootAllocator(),
+ pool, FileFormat.PARQUET,
+ writeSupport.getOutputURI());
+ ScanOptions options = new ScanOptions(100);
+ long initReservation = DirectReservationListener.instance().getCurrentDirectMemReservation();
+ List<ArrowRecordBatch> datum = collectResultFromFactory(factory, options);
+ long reservation = DirectReservationListener.instance().getCurrentDirectMemReservation();
+ AutoCloseables.close(datum);
+ AutoCloseables.close(pool);
+ long finalReservation = DirectReservationListener.instance().getCurrentDirectMemReservation();
+ Assert.assertTrue(reservation >= initReservation);
+ Assert.assertTrue(finalReservation == initReservation);
+ }
+
+ @Test
+ public void testCustomReservationListener() throws Exception {
+ ParquetWriteSupport writeSupport = ParquetWriteSupport.writeTempFile(AVRO_SCHEMA_USER, TMP.newFolder(), 1, "a");
+ final AtomicLong reserved = new AtomicLong(0L);
+ ReservationListener listener = new ReservationListener() {
+ @Override
+ public void reserve(long size) {
+ reserved.getAndAdd(size);
+ }
+
+ @Override
+ public void unreserve(long size) {
+ reserved.getAndAdd(-size);
+ }
+ };
+ NativeMemoryPool pool = NativeMemoryPool.createListenable(listener);
+ FileSystemDatasetFactory factory = new FileSystemDatasetFactory(rootAllocator(),
+ pool, FileFormat.PARQUET, writeSupport.getOutputURI());
+ ScanOptions options = new ScanOptions(100);
+ long initReservation = reserved.get();
+ List<ArrowRecordBatch> datum = collectResultFromFactory(factory, options);
+ long reservation = reserved.get();
+ AutoCloseables.close(datum);
+ AutoCloseables.close(pool);
+ long finalReservation = reserved.get();
+ Assert.assertTrue(reservation >= initReservation);
+ Assert.assertTrue(finalReservation == initReservation);
+ }
+}
diff --git a/src/arrow/java/dataset/src/test/java/org/apache/arrow/memory/TestNativeUnderlyingMemory.java b/src/arrow/java/dataset/src/test/java/org/apache/arrow/memory/TestNativeUnderlyingMemory.java
new file mode 100644
index 000000000..c81868e42
--- /dev/null
+++ b/src/arrow/java/dataset/src/test/java/org/apache/arrow/memory/TestNativeUnderlyingMemory.java
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+import static org.junit.Assert.*;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestNativeUnderlyingMemory {
+
+ private RootAllocator allocator = null;
+
+ @Before
+ public void setUp() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ }
+
+ @After
+ public void tearDown() {
+ allocator.close();
+ }
+
+ protected RootAllocator rootAllocator() {
+ return allocator;
+ }
+
+ @Test
+ public void testReservation() {
+ final RootAllocator root = rootAllocator();
+
+ final int size = 512;
+ final AllocationManager am = new MockUnderlyingMemory(root, size);
+ final BufferLedger ledger = am.associate(root);
+
+ assertEquals(size, root.getAllocatedMemory());
+
+ ledger.release();
+ }
+
+ @Test
+ public void testBufferTransfer() {
+ final RootAllocator root = rootAllocator();
+
+ ChildAllocator allocator1 = (ChildAllocator) root.newChildAllocator("allocator1", 0, Long.MAX_VALUE);
+ ChildAllocator allocator2 = (ChildAllocator) root.newChildAllocator("allocator2", 0, Long.MAX_VALUE);
+ assertEquals(0, allocator1.getAllocatedMemory());
+ assertEquals(0, allocator2.getAllocatedMemory());
+
+ final int size = 512;
+ final AllocationManager am = new MockUnderlyingMemory(allocator1, size);
+
+ final BufferLedger owningLedger = am.associate(allocator1);
+ assertEquals(size, owningLedger.getAccountedSize());
+ assertEquals(size, owningLedger.getSize());
+ assertEquals(size, allocator1.getAllocatedMemory());
+
+ final BufferLedger transferredLedger = am.associate(allocator2);
+ owningLedger.release(); // release previous owner
+ assertEquals(0, owningLedger.getAccountedSize());
+ assertEquals(size, owningLedger.getSize());
+ assertEquals(size, transferredLedger.getAccountedSize());
+ assertEquals(size, transferredLedger.getSize());
+ assertEquals(0, allocator1.getAllocatedMemory());
+ assertEquals(size, allocator2.getAllocatedMemory());
+
+ transferredLedger.release();
+ allocator1.close();
+ allocator2.close();
+ }
+
+ /**
+ * A mock class of {@link NativeUnderlyingMemory} for unit testing about size-related operations.
+ */
+ private static class MockUnderlyingMemory extends NativeUnderlyingMemory {
+
+ /**
+ * Constructor.
+ */
+ MockUnderlyingMemory(BaseAllocator accountingAllocator, int size) {
+ super(accountingAllocator, size, -1L, -1L);
+ }
+
+ @Override
+ protected void release0() {
+ System.out.println("Underlying memory released. Size: " + getSize());
+ }
+
+ @Override
+ protected long memoryAddress() {
+ throw new UnsupportedOperationException();
+ }
+ }
+}
diff --git a/src/arrow/java/dataset/src/test/resources/avroschema/user.avsc b/src/arrow/java/dataset/src/test/resources/avroschema/user.avsc
new file mode 100644
index 000000000..072b64391
--- /dev/null
+++ b/src/arrow/java/dataset/src/test/resources/avroschema/user.avsc
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.dataset",
+ "type": "record",
+ "name": "User",
+ "fields": [
+ {"name": "id", "type": ["int", "null"]},
+ {"name": "name", "type": ["string", "null"]}
+ ]
+}
diff --git a/src/arrow/java/dev/checkstyle/checkstyle.license b/src/arrow/java/dev/checkstyle/checkstyle.license
new file mode 100644
index 000000000..3e7c6c26f
--- /dev/null
+++ b/src/arrow/java/dev/checkstyle/checkstyle.license
@@ -0,0 +1,16 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
diff --git a/src/arrow/java/dev/checkstyle/checkstyle.xml b/src/arrow/java/dev/checkstyle/checkstyle.xml
new file mode 100644
index 000000000..c27f382dd
--- /dev/null
+++ b/src/arrow/java/dev/checkstyle/checkstyle.xml
@@ -0,0 +1,280 @@
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one or more
+ ~ contributor license agreements. See the NOTICE file distributed with
+ ~ this work for additional information regarding copyright ownership.
+ ~ The ASF licenses this file to You under the Apache License, Version 2.0
+ ~ (the "License"); you may not use this file except in compliance with
+ ~ the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+
+<!DOCTYPE module PUBLIC
+ "-//Checkstyle//DTD Checkstyle Configuration 1.3//EN"
+ "https://checkstyle.org/dtds/configuration_1_3.dtd">
+
+<!--
+ Checkstyle configuration that checks the Google coding conventions from Google Java Style
+ that can be found at https://google.github.io/styleguide/javaguide.html.
+
+ Checkstyle is very configurable. Be sure to read the documentation at
+ http://checkstyle.sf.net (or in your downloaded distribution).
+
+ To completely disable a check, just comment it out or delete it from the file.
+
+ Authors: Max Vetrenko, Ruslan Diachenko, Roman Ivanov.
+ -->
+
+<module name = "Checker">
+ <property name="charset" value="UTF-8"/>
+
+ <property name="severity" value="warning"/>
+
+ <property name="fileExtensions" value="java, properties, xml"/>
+
+ <!-- Arrow specific modules -->
+
+ <module name="Header">
+ <property name="headerFile" value="${checkstyle.header.file}"/>
+ </module>
+
+ <module name="SuppressionFilter">
+ <property name="file" value="${checkstyle.suppressions.file}"/>
+ </module>
+
+ <module name="NewlineAtEndOfFile">
+ <property name="lineSeparator" value="lf" />
+ </module>
+
+ <!-- Google style modules -->
+
+ <!-- Checks for whitespace -->
+ <!-- See http://checkstyle.sf.net/config_whitespace.html -->
+ <module name="FileTabCharacter">
+ <property name="eachLine" value="true"/>
+ </module>
+
+ <module name="TreeWalker">
+ <module name="OuterTypeFilename"/>
+ <module name="IllegalTokenText">
+ <property name="tokens" value="STRING_LITERAL, CHAR_LITERAL"/>
+ <property name="format" value="\\u00(08|09|0(a|A)|0(c|C)|0(d|D)|22|27|5(C|c))|\\(0(10|11|12|14|15|42|47)|134)"/>
+ <property name="message" value="Avoid using corresponding octal or Unicode escape."/>
+ </module>
+ <module name="AvoidEscapedUnicodeCharacters">
+ <property name="allowEscapesForControlCharacters" value="true"/>
+ <property name="allowByTailComment" value="true"/>
+ <property name="allowNonPrintableEscapes" value="true"/>
+ </module>
+ <module name="LineLength">
+ <property name="max" value="120"/>
+ <property name="ignorePattern" value="^package.*|^import.*|a href|href|http://|https://|ftp://"/>
+ </module>
+ <module name="OneTopLevelClass"/>
+ <module name="NoLineWrap"/>
+ <module name="EmptyBlock">
+ <property name="option" value="TEXT"/>
+ <property name="tokens" value="LITERAL_TRY, LITERAL_FINALLY, LITERAL_IF, LITERAL_ELSE, LITERAL_SWITCH"/>
+ </module>
+ <module name="NeedBraces"/>
+ <module name="RightCurly"/>
+ <module name="RightCurly">
+ <property name="option" value="alone"/>
+ <property name="tokens" value="CLASS_DEF, METHOD_DEF, CTOR_DEF, LITERAL_FOR, LITERAL_WHILE, LITERAL_DO, STATIC_INIT, INSTANCE_INIT"/>
+ </module>
+ <module name="WhitespaceAround">
+ <property name="allowEmptyConstructors" value="true"/>
+ <property name="allowEmptyMethods" value="true"/>
+ <property name="allowEmptyTypes" value="true"/>
+ <property name="allowEmptyLoops" value="true"/>
+ <message key="ws.notFollowed"
+ value="WhitespaceAround: ''{0}'' is not followed by whitespace. Empty blocks may only be represented as '{}' when not part of a multi-block statement (4.1.3)"/>
+ <message key="ws.notPreceded"
+ value="WhitespaceAround: ''{0}'' is not preceded with whitespace."/>
+ </module>
+ <module name="WhitespaceAfter">
+ <property name="tokens" value="COMMA, SEMI, TYPECAST"/>
+ </module>
+ <module name="OneStatementPerLine"/>
+ <module name="MultipleVariableDeclarations"/>
+ <module name="ArrayTypeStyle"/>
+ <module name="MissingSwitchDefault"/>
+ <module name="FallThrough"/>
+ <module name="UpperEll"/>
+ <module name="ModifierOrder"/>
+ <module name="EmptyLineSeparator">
+ <property name="allowNoEmptyLineBetweenFields" value="true"/>
+ </module>
+ <module name="SeparatorWrap">
+ <property name="tokens" value="DOT"/>
+ <property name="option" value="nl"/>
+ </module>
+ <module name="SeparatorWrap">
+ <property name="tokens" value="COMMA"/>
+ <property name="option" value="EOL"/>
+ </module>
+ <module name="PackageName">
+ <property name="format" value="^[a-z]+(\.[a-z][a-z0-9]*)*$"/>
+ <message key="name.invalidPattern"
+ value="Package name ''{0}'' must match pattern ''{1}''."/>
+ </module>
+ <module name="TypeName">
+ <message key="name.invalidPattern"
+ value="Type name ''{0}'' must match pattern ''{1}''."/>
+ </module>
+ <module name="MemberName">
+ <property name="format" value="^[a-z][a-zA-Z0-9]*|[A-Z][A-Z0-9]*(_[A-Z0-9]+)*$"/>
+ <message key="name.invalidPattern"
+ value="Member name ''{0}'' must match pattern ''{1}''."/>
+ </module>
+ <module name="ParameterName">
+ <property name="format" value="^[a-z][a-zA-Z0-9]*$"/>
+ <message key="name.invalidPattern"
+ value="Parameter name ''{0}'' must match pattern ''{1}''."/>
+ </module>
+ <module name="CatchParameterName">
+ <property name="format" value="^(e|t|ex|[a-z]e|[a-z][a-z][a-zA-Z]+)$"/>
+ <message key="name.invalidPattern"
+ value="Catch parameter name ''{0}'' must match pattern ''{1}''."/>
+ </module>
+ <module name="LocalVariableName">
+ <property name="tokens" value="VARIABLE_DEF"/>
+ <property name="format" value="^[a-z][a-zA-Z0-9]*$"/>
+ <property name="allowOneCharVarInForLoop" value="true"/>
+ <message key="name.invalidPattern"
+ value="Local variable name ''{0}'' must match pattern ''{1}''."/>
+ </module>
+ <module name="ClassTypeParameterName">
+ <property name="format" value="(^[A-Z]+[0-9]?)$|([A-Z][a-zA-Z0-9]*[T]$)"/>
+ <message key="name.invalidPattern"
+ value="Class type name ''{0}'' must match pattern ''{1}''."/>
+ </module>
+ <module name="MethodTypeParameterName">
+ <property name="format" value="(^[A-Z]+[0-9]?)$|([A-Z][a-zA-Z0-9]*[T]$)"/>
+ <message key="name.invalidPattern"
+ value="Method type name ''{0}'' must match pattern ''{1}''."/>
+ </module>
+ <module name="InterfaceTypeParameterName">
+ <property name="format" value="(^[A-Z]+[0-9]?)$|([A-Z][a-zA-Z0-9]*[T]$)"/>
+ <message key="name.invalidPattern"
+ value="Interface type name ''{0}'' must match pattern ''{1}''."/>
+ </module>
+ <module name="NoFinalizer"/>
+ <module name="GenericWhitespace">
+ <message key="ws.followed"
+ value="GenericWhitespace ''{0}'' is followed by whitespace."/>
+ <message key="ws.preceded"
+ value="GenericWhitespace ''{0}'' is preceded with whitespace."/>
+ <message key="ws.illegalFollow"
+ value="GenericWhitespace ''{0}'' should followed by whitespace."/>
+ <message key="ws.notPreceded"
+ value="GenericWhitespace ''{0}'' is not preceded with whitespace."/>
+ </module>
+ <module name="Indentation">
+ <property name="basicOffset" value="2"/>
+ <property name="braceAdjustment" value="0"/>
+ <property name="caseIndent" value="2"/>
+ <property name="throwsIndent" value="4"/>
+ <property name="lineWrappingIndentation" value="4"/>
+ <property name="arrayInitIndent" value="2"/>
+ </module>
+ <module name="OverloadMethodsDeclarationOrder"/>
+ <module name="VariableDeclarationUsageDistance"/>
+ <module name="AvoidStarImport">
+ <property name="allowStaticMemberImports" value="true"/>
+ </module>
+ <module name="ImportOrder">
+ <property name="groups" value="java,javax,org,com"/>
+ <property name="ordered" value="true"/>
+ <property name="separated" value="true"/>
+ <property name="option" value="top"/>
+ <property name="sortStaticImportsAlphabetically" value="true"/>
+ </module>
+ <module name="SingleSpaceSeparator">
+ <property name="validateComments" value="true"/>
+ </module>
+ <module name="MethodParamPad"/>
+ <module name="OperatorWrap">
+ <property name="option" value="eol"/>
+ <property name="tokens" value="BAND, BOR, BSR, BXOR, DIV, EQUAL, GE, GT, LAND, LE, LITERAL_INSTANCEOF, LOR, LT, MINUS, MOD, NOT_EQUAL, PLUS, QUESTION, SL, SR, STAR "/>
+ </module>
+ <module name="AnnotationLocation">
+ <property name="tokens" value="CLASS_DEF, INTERFACE_DEF, ENUM_DEF, METHOD_DEF, CTOR_DEF"/>
+ </module>
+ <module name="AnnotationLocation">
+ <property name="tokens" value="VARIABLE_DEF"/>
+ <property name="allowSamelineMultipleAnnotations" value="true"/>
+ </module>
+ <module name="NonEmptyAtclauseDescription"/>
+ <module name="JavadocTagContinuationIndentation"/>
+ <module name="SummaryJavadoc">
+ <property name="forbiddenSummaryFragments" value="^@return the *|^This method returns |^A [{]@code [a-zA-Z0-9]+[}]( is a )"/>
+ </module>
+ <module name="AtclauseOrder">
+ <property name="tagOrder" value="@param, @return, @throws, @deprecated"/>
+ <property name="target" value="CLASS_DEF, INTERFACE_DEF, ENUM_DEF, METHOD_DEF, CTOR_DEF, VARIABLE_DEF"/>
+ </module>
+ <module name="JavadocMethod">
+ <property name="scope" value="public"/>
+ <property name="allowMissingParamTags" value="true"/>
+ <property name="allowMissingThrowsTags" value="true"/>
+ <property name="allowMissingReturnTag" value="true"/>
+ <property name="minLineCount" value="2"/>
+ <property name="allowedAnnotations" value="Override, Test"/>
+ <property name="allowThrowsTagsForSubclasses" value="true"/>
+ <!-- This seems partially broken under JDK >= 9. -->
+ <property name="suppressLoadErrors" value="true"/>
+ <property name="ignoreMethodNamesRegex" value="main"/>
+ </module>
+ <module name="JavadocType">
+ <property name="scope" value="public"/>
+ </module>
+ <module name="JavadocType">
+ <property name="scope" value="protected"/>
+ </module>
+ <module name="JavadocType">
+ <property name="scope" value="package"/>
+ </module>
+ <module name="MethodName">
+ <property name="format" value="^[a-z][a-z0-9][a-zA-Z0-9_]*$"/>
+ <message key="name.invalidPattern"
+ value="Method name ''{0}'' must match pattern ''{1}''."/>
+ </module>
+ <module name="SingleLineJavadoc">
+ <property name="ignoreInlineTags" value="false"/>
+ </module>
+ <module name="EmptyCatchBlock">
+ <property name="exceptionVariableName" value="expected|ignore"/>
+ </module>
+ <module name="CommentsIndentation"/>
+ <module name="UnusedImports">
+ <property name="severity" value="error"/>
+ <property name="processJavadoc" value="true"/>
+ <message key="import.unused"
+ value="Unused import: {0}."/>
+ </module>
+ <module name="RedundantModifier">
+ <!-- Checks for redundant modifiers on various symbol definitions -->
+ <property name="tokens" value="VARIABLE_DEF, ANNOTATION_FIELD_DEF, INTERFACE_DEF, CLASS_DEF, ENUM_DEF, METHOD_DEF"/>
+ </module>
+
+ <!--
+ If you wish to turn off checking for a section of code, you can put a comment in the source
+ before and after the section, with the following syntax:
+ // checkstyle:off no.XXX (such as checkstyle.off: NoFinalizer)
+ ... // stuff that breaks the styles
+ // checkstyle:on
+ -->
+ <module name="SuppressionCommentFilter">
+ <property name="offCommentFormat" value="checkstyle:off: ([\w\|]+)"/>
+ <property name="onCommentFormat" value="checkstyle:on: ([\w\|]+)"/>
+ <property name="checkFormat" value="$1"/>
+ </module>
+ </module>
+</module>
diff --git a/src/arrow/java/dev/checkstyle/suppressions.xml b/src/arrow/java/dev/checkstyle/suppressions.xml
new file mode 100644
index 000000000..c3f61f46c
--- /dev/null
+++ b/src/arrow/java/dev/checkstyle/suppressions.xml
@@ -0,0 +1,42 @@
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one or more
+ ~ contributor license agreements. See the NOTICE file distributed with
+ ~ this work for additional information regarding copyright ownership.
+ ~ The ASF licenses this file to You under the Apache License, Version 2.0
+ ~ (the "License"); you may not use this file except in compliance with
+ ~ the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+
+<!DOCTYPE suppressions PUBLIC
+ "-//Checkstyle//DTD SuppressionFilter Configuration 1.2//EN"
+ "https://checkstyle.org/dtds/suppressions_1_2.dtd">
+
+<suppressions>
+ <suppress checks="JavadocPackage" files=".*[\\/]src[\\/]test[\\/].*"/>
+ <suppress checks="JavadocPackage" files=".*[\\/]maven-archetypes[\\/].*"/>
+ <suppress checks="JavadocPackage" files=".*[\\/]examples[\\/].*"/>
+ <!-- Method javadoc not required in testing directories -->
+ <suppress checks="JavadocMethod" files=".*[\\/]src[\\/]test[\\/].*"/>
+ <!-- Class javadoc not required in testing directories -->
+ <suppress checks="JavadocType" files=".*[\\/]src[\\/]test[\\/].*"/>
+
+ <!-- suppress all checks in the generated directories -->
+ <suppress checks=".*" files=".+[\\/]generated[\\/].+\.java" />
+ <suppress checks=".*" files=".+[\\/]generated-sources[\\/].+\.java" />
+ <suppress checks=".*" files=".+[\\/]generated-test-sources[\\/].+\.java" />
+
+ <!-- suppress files that include additional lines in license -->
+ <suppress checks="Header" files="AutoCloseables.java|Collections2.java" />
+
+ <!-- Suppress certain checks requiring many code changes, that add little benefit -->
+ <suppress checks="NoFinalizer|OverloadMethodsDeclarationOrder|VariableDeclarationUsageDistance" files=".*" />
+
+</suppressions>
diff --git a/src/arrow/java/flight/flight-core/README.md b/src/arrow/java/flight/flight-core/README.md
new file mode 100644
index 000000000..37b41ede2
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/README.md
@@ -0,0 +1,95 @@
+<!---
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+# Arrow Flight Java Package
+
+Exposing Apache Arrow data on the wire.
+
+[Protocol Description Slides](https://www.slideshare.net/JacquesNadeau5/apache-arrow-flight-overview)
+
+[GRPC Protocol Definition](https://github.com/apache/arrow/blob/master/format/Flight.proto)
+
+## Example usage
+
+* Compile the java tree:
+
+ ```
+ cd java
+ mvn clean install -DskipTests
+ ```
+
+* Go Into the Flight tree
+
+ ```
+ cd flight/flight-core
+ ```
+
+
+* Start the ExampleFlightServer (supports get/put of streams and listing these streams)
+
+ ```
+ mvn exec:exec
+ ```
+
+* In new terminal, run the TestExampleServer to populate the server with example data
+
+ ```
+ cd arrow/java/flight/flight-core
+ mvn surefire:test -DdisableServer=true -Dtest=TestExampleServer
+ ```
+
+## Python Example Usage
+
+* Compile example python headers
+
+ ```
+ mkdir target/generated-python
+ pip install grpcio-tools # or conda install grpcio
+ python -m grpc_tools.protoc -I./src/main/protobuf/ --python_out=./target/generated-python --grpc_python_out=./target/generated-python ../../format/Flight.proto
+ ```
+
+* Connect to the Flight Service
+
+ ```
+ cd target/generated-python
+ python
+ ```
+
+
+ ```
+ import grpc
+ import flight_pb2
+ import flight_pb2_grpc as flightrpc
+ channel = grpc.insecure_channel('localhost:12233')
+ service = flightrpc.FlightServiceStub(channel)
+ ```
+
+* List the Flight from Python
+
+ ```
+ for f in service.ListFlights(flight_pb2.Criteria()): f
+ ```
+
+* Try to Drop
+
+ ```
+ action = flight_pb2.Action()
+ action.type="drop"
+ service.DoAction(action)
+ ```
diff --git a/src/arrow/java/flight/flight-core/pom.xml b/src/arrow/java/flight/flight-core/pom.xml
new file mode 100644
index 000000000..669c6b744
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/pom.xml
@@ -0,0 +1,392 @@
+<?xml version="1.0"?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+ license agreements. See the NOTICE file distributed with this work for additional
+ information regarding copyright ownership. The ASF licenses this file to
+ You under the Apache License, Version 2.0 (the "License"); you may not use
+ this file except in compliance with the License. You may obtain a copy of
+ the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+ by applicable law or agreed to in writing, software distributed under the
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ OF ANY KIND, either express or implied. See the License for the specific
+ language governing permissions and limitations under the License. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-java-root</artifactId>
+ <version>6.0.1</version>
+ <relativePath>../../pom.xml</relativePath>
+ </parent>
+
+ <artifactId>flight-core</artifactId>
+ <name>Arrow Flight Core</name>
+ <description>(Experimental)An RPC mechanism for transferring ValueVectors.</description>
+ <packaging>jar</packaging>
+
+ <properties>
+ <dep.grpc.version>1.41.0</dep.grpc.version>
+ <dep.protobuf.version>3.7.1</dep.protobuf.version>
+ <forkCount>1</forkCount>
+ </properties>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-format</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-vector</artifactId>
+ <version>${project.version}</version>
+ <classifier>${arrow.vector.classifier}</classifier>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-memory-core</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-memory-netty</artifactId>
+ <version>${project.version}</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>io.grpc</groupId>
+ <artifactId>grpc-netty</artifactId>
+ <version>${dep.grpc.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>io.grpc</groupId>
+ <artifactId>grpc-core</artifactId>
+ <version>${dep.grpc.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>io.grpc</groupId>
+ <artifactId>grpc-context</artifactId>
+ <version>${dep.grpc.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>io.grpc</groupId>
+ <artifactId>grpc-protobuf</artifactId>
+ <version>${dep.grpc.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>io.netty</groupId>
+ <artifactId>netty-tcnative-boringssl-static</artifactId>
+ <version>2.0.43.Final</version>
+ </dependency>
+ <dependency>
+ <groupId>io.netty</groupId>
+ <artifactId>netty-buffer</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>io.netty</groupId>
+ <artifactId>netty-handler</artifactId>
+ <version>${dep.netty.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>io.netty</groupId>
+ <artifactId>netty-transport</artifactId>
+ <version>${dep.netty.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>commons-cli</groupId>
+ <artifactId>commons-cli</artifactId>
+ <version>1.4</version>
+ </dependency>
+ <dependency>
+ <groupId>io.grpc</groupId>
+ <artifactId>grpc-stub</artifactId>
+ <version>${dep.grpc.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>com.google.protobuf</groupId>
+ <artifactId>protobuf-java</artifactId>
+ <version>${dep.protobuf.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>io.grpc</groupId>
+ <artifactId>grpc-api</artifactId>
+ <version>${dep.grpc.version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-annotations</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-databind</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>javax.annotation</groupId>
+ <artifactId>javax.annotation-api</artifactId>
+ </dependency>
+
+ <dependency>
+ <groupId>com.google.api.grpc</groupId>
+ <artifactId>proto-google-common-protos</artifactId>
+ <version>1.12.0</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-vector</artifactId>
+ <version>${project.version}</version>
+ <classifier>tests</classifier>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+ <build>
+ <extensions>
+ <extension>
+ <groupId>kr.motd.maven</groupId>
+ <artifactId>os-maven-plugin</artifactId>
+ <version>1.5.0.Final</version>
+ </extension>
+ </extensions>
+ <plugins>
+ <plugin>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <configuration>
+ <enableAssertions>false</enableAssertions>
+ <systemPropertyVariables>
+ <arrow.test.dataRoot>${project.basedir}/../../../testing/data</arrow.test.dataRoot>
+ </systemPropertyVariables>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-shade-plugin</artifactId>
+ <version>3.1.1</version>
+ <executions>
+ <execution>
+ <id>shade-main</id>
+ <phase>package</phase>
+ <goals>
+ <goal>shade</goal>
+ </goals>
+ <configuration>
+ <shadedArtifactAttached>true</shadedArtifactAttached>
+ <shadedClassifierName>shaded</shadedClassifierName>
+ <artifactSet>
+ <includes>
+ <include>io.grpc:*</include>
+ <include>com.google.protobuf:*</include>
+ </includes>
+ </artifactSet>
+ <relocations>
+ <relocation>
+ <pattern>com.google.protobuf</pattern>
+ <shadedPattern>arrow.flight.com.google.protobuf</shadedPattern>
+ </relocation>
+ </relocations>
+ <transformers>
+ <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
+ </transformers>
+ </configuration>
+ </execution>
+ <execution>
+ <id>shade-ext</id>
+ <phase>package</phase>
+ <goals>
+ <goal>shade</goal>
+ </goals>
+ <configuration>
+ <shadedArtifactAttached>true</shadedArtifactAttached>
+ <shadedClassifierName>shaded-ext</shadedClassifierName>
+ <artifactSet>
+ <includes>
+ <include>io.grpc:*</include>
+ <include>com.google.protobuf:*</include>
+ <include>com.google.guava:*</include>
+ </includes>
+ </artifactSet>
+ <relocations>
+ <relocation>
+ <pattern>com.google.protobuf</pattern>
+ <shadedPattern>arrow.flight.com.google.protobuf</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>com.google.common</pattern>
+ <shadedPattern>arrow.flight.com.google.common</shadedPattern>
+ </relocation>
+ </relocations>
+ <transformers>
+ <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
+ </transformers>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.xolstice.maven.plugins</groupId>
+ <artifactId>protobuf-maven-plugin</artifactId>
+ <version>0.5.0</version>
+ <configuration>
+ <protocArtifact>com.google.protobuf:protoc:${dep.protobuf.version}:exe:${os.detected.classifier}</protocArtifact>
+ <clearOutputDirectory>false</clearOutputDirectory>
+ <pluginId>grpc-java</pluginId>
+ <pluginArtifact>io.grpc:protoc-gen-grpc-java:${dep.grpc.version}:exe:${os.detected.classifier}</pluginArtifact>
+ </configuration>
+ <executions>
+ <execution>
+ <id>src</id>
+ <configuration>
+ <protoSourceRoot>${basedir}/../../../format/</protoSourceRoot>
+ <outputDirectory>${project.build.directory}/generated-sources/protobuf</outputDirectory>
+ </configuration>
+ <goals>
+ <goal>compile</goal>
+ <goal>compile-custom</goal>
+ </goals>
+ </execution>
+ <execution>
+ <id>test</id>
+ <configuration>
+ <protoSourceRoot>${basedir}/src/test/protobuf</protoSourceRoot>
+ <outputDirectory>${project.build.directory}/generated-test-sources//protobuf</outputDirectory>
+ </configuration>
+ <goals>
+ <goal>compile</goal>
+ <goal>compile-custom</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>exec-maven-plugin</artifactId>
+ <version>1.6.0</version>
+ <configuration>
+ <executable>java</executable>
+ <classpathScope>test</classpathScope>
+ <arguments>
+ <argument>-classpath</argument>
+ <classpath />
+ <argument>-Xms64m</argument>
+ <argument>-Xmx64m</argument>
+ <argument>-XX:MaxDirectMemorySize=4g</argument>
+ <argument>org.apache.arrow.flight.example.ExampleFlightServer</argument>
+ </arguments>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-dependency-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>analyze</id>
+ <phase>verify</phase>
+ <goals>
+ <goal>analyze-only</goal>
+ </goals>
+ <configuration>
+ <ignoredDependencies combine.children="append">
+ <ignoredDependency>io.netty:netty-tcnative-boringssl-static:*</ignoredDependency>
+ </ignoredDependencies>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin> <!-- add generated sources to classpath -->
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>build-helper-maven-plugin</artifactId>
+ <version>1.9.1</version>
+ <executions>
+ <execution>
+ <id>add-generated-sources-to-classpath</id>
+ <phase>generate-sources</phase>
+ <goals>
+ <goal>add-source</goal>
+ </goals>
+ <configuration>
+ <sources>
+ <source>${project.build.directory}/generated-sources/protobuf</source>
+ </sources>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <artifactId>maven-assembly-plugin</artifactId>
+ <version>3.0.0</version>
+ <configuration>
+ <descriptorRefs>
+ <descriptorRef>jar-with-dependencies</descriptorRef>
+ </descriptorRefs>
+ </configuration>
+ <executions>
+ <execution>
+ <id>make-assembly</id>
+ <phase>package</phase>
+ <goals>
+ <goal>single</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+ <profiles>
+ <profile>
+ <id>linux-netty-native</id>
+ <activation>
+ <os>
+ <family>linux</family>
+ </os>
+ </activation>
+ <dependencies>
+ <dependency>
+ <groupId>io.netty</groupId>
+ <artifactId>netty-transport-native-unix-common</artifactId>
+ <version>${dep.netty.version}</version>
+ <classifier>${os.detected.name}-${os.detected.arch}</classifier>
+ </dependency>
+ <dependency>
+ <groupId>io.netty</groupId>
+ <artifactId>netty-transport-native-epoll</artifactId>
+ <version>${dep.netty.version}</version>
+ <classifier>${os.detected.name}-${os.detected.arch}</classifier>
+ </dependency>
+ </dependencies>
+ </profile>
+ <profile>
+ <id>mac-netty-native</id>
+ <activation>
+ <os>
+ <family>mac</family>
+ </os>
+ </activation>
+ <dependencies>
+ <dependency>
+ <groupId>io.netty</groupId>
+ <artifactId>netty-transport-native-unix-common</artifactId>
+ <version>${dep.netty.version}</version>
+ <classifier>${os.detected.name}-${os.detected.arch}</classifier>
+ </dependency>
+ <dependency>
+ <groupId>io.netty</groupId>
+ <artifactId>netty-transport-native-kqueue</artifactId>
+ <version>${dep.netty.version}</version>
+ <classifier>${os.detected.name}-${os.detected.arch}</classifier>
+ </dependency>
+ </dependencies>
+ </profile>
+ </profiles>
+</project>
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Action.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Action.java
new file mode 100644
index 000000000..524ffcab9
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Action.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import org.apache.arrow.flight.impl.Flight;
+
+import com.google.protobuf.ByteString;
+
+/**
+ * An opaque action for the service to perform.
+ *
+ * <p>This is a POJO wrapper around the message of the same name in Flight.proto.
+ */
+public class Action {
+
+ private final String type;
+ private final byte[] body;
+
+ public Action(String type) {
+ this(type, null);
+ }
+
+ public Action(String type, byte[] body) {
+ this.type = type;
+ this.body = body == null ? new byte[0] : body;
+ }
+
+ Action(Flight.Action action) {
+ this(action.getType(), action.getBody().toByteArray());
+ }
+
+ public String getType() {
+ return type;
+ }
+
+ public byte[] getBody() {
+ return body;
+ }
+
+ Flight.Action toProtocol() {
+ return Flight.Action.newBuilder()
+ .setType(getType())
+ .setBody(ByteString.copyFrom(getBody()))
+ .build();
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ActionType.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ActionType.java
new file mode 100644
index 000000000..d89365612
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ActionType.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import org.apache.arrow.flight.impl.Flight;
+
+/**
+ * POJO wrapper around protocol specifics for Flight actions.
+ */
+public class ActionType {
+ private final String type;
+ private final String description;
+
+ /**
+ * Construct a new instance.
+ *
+ * @param type The type of action to perform
+ * @param description The description of the type.
+ */
+ public ActionType(String type, String description) {
+ super();
+ this.type = type;
+ this.description = description;
+ }
+
+ /**
+ * Constructs a new instance from the corresponding protocol buffer object.
+ */
+ ActionType(Flight.ActionType type) {
+ this.type = type.getType();
+ this.description = type.getDescription();
+ }
+
+ public String getType() {
+ return type;
+ }
+
+ /**
+ * Converts the POJO to the corresponding protocol buffer type.
+ */
+ Flight.ActionType toProtocol() {
+ return Flight.ActionType.newBuilder()
+ .setType(type)
+ .setDescription(description)
+ .build();
+ }
+
+ @Override
+ public String toString() {
+ return "ActionType{" +
+ "type='" + type + '\'' +
+ ", description='" + description + '\'' +
+ '}';
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ArrowMessage.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ArrowMessage.java
new file mode 100644
index 000000000..b4ee835de
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ArrowMessage.java
@@ -0,0 +1,560 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.arrow.flight.grpc.AddWritableBuffer;
+import org.apache.arrow.flight.grpc.GetReadableBuffer;
+import org.apache.arrow.flight.impl.Flight.FlightData;
+import org.apache.arrow.flight.impl.Flight.FlightDescriptor;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.util.AutoCloseables;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.compression.NoCompressionCodec;
+import org.apache.arrow.vector.ipc.message.ArrowBodyCompression;
+import org.apache.arrow.vector.ipc.message.ArrowDictionaryBatch;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.ipc.message.IpcOption;
+import org.apache.arrow.vector.ipc.message.MessageMetadataResult;
+import org.apache.arrow.vector.ipc.message.MessageSerializer;
+import org.apache.arrow.vector.types.MetadataVersion;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterables;
+import com.google.common.io.ByteStreams;
+import com.google.protobuf.ByteString;
+import com.google.protobuf.CodedInputStream;
+import com.google.protobuf.CodedOutputStream;
+import com.google.protobuf.WireFormat;
+
+import io.grpc.Drainable;
+import io.grpc.MethodDescriptor.Marshaller;
+import io.grpc.protobuf.ProtoUtils;
+import io.netty.buffer.ByteBuf;
+import io.netty.buffer.ByteBufInputStream;
+import io.netty.buffer.CompositeByteBuf;
+import io.netty.buffer.Unpooled;
+import io.netty.buffer.UnpooledByteBufAllocator;
+
+/**
+ * The in-memory representation of FlightData used to manage a stream of Arrow messages.
+ */
+class ArrowMessage implements AutoCloseable {
+
+ // If true, deserialize Arrow data by giving Arrow a reference to the underlying gRPC buffer
+ // instead of copying the data. Defaults to true.
+ public static final boolean ENABLE_ZERO_COPY_READ;
+ // If true, serialize Arrow data by giving gRPC a reference to the underlying Arrow buffer
+ // instead of copying the data. Defaults to false.
+ public static final boolean ENABLE_ZERO_COPY_WRITE;
+
+ static {
+ String zeroCopyReadFlag = System.getProperty("arrow.flight.enable_zero_copy_read");
+ if (zeroCopyReadFlag == null) {
+ zeroCopyReadFlag = System.getenv("ARROW_FLIGHT_ENABLE_ZERO_COPY_READ");
+ }
+ String zeroCopyWriteFlag = System.getProperty("arrow.flight.enable_zero_copy_write");
+ if (zeroCopyWriteFlag == null) {
+ zeroCopyWriteFlag = System.getenv("ARROW_FLIGHT_ENABLE_ZERO_COPY_WRITE");
+ }
+ ENABLE_ZERO_COPY_READ = !"false".equalsIgnoreCase(zeroCopyReadFlag);
+ ENABLE_ZERO_COPY_WRITE = "true".equalsIgnoreCase(zeroCopyWriteFlag);
+ }
+
+ private static final int DESCRIPTOR_TAG =
+ (FlightData.FLIGHT_DESCRIPTOR_FIELD_NUMBER << 3) | WireFormat.WIRETYPE_LENGTH_DELIMITED;
+ private static final int BODY_TAG =
+ (FlightData.DATA_BODY_FIELD_NUMBER << 3) | WireFormat.WIRETYPE_LENGTH_DELIMITED;
+ private static final int HEADER_TAG =
+ (FlightData.DATA_HEADER_FIELD_NUMBER << 3) | WireFormat.WIRETYPE_LENGTH_DELIMITED;
+ private static final int APP_METADATA_TAG =
+ (FlightData.APP_METADATA_FIELD_NUMBER << 3) | WireFormat.WIRETYPE_LENGTH_DELIMITED;
+
+ private static final Marshaller<FlightData> NO_BODY_MARSHALLER =
+ ProtoUtils.marshaller(FlightData.getDefaultInstance());
+
+ /** Get the application-specific metadata in this message. The ArrowMessage retains ownership of the buffer. */
+ public ArrowBuf getApplicationMetadata() {
+ return appMetadata;
+ }
+
+ /** Types of messages that can be sent. */
+ public enum HeaderType {
+ NONE,
+ SCHEMA,
+ DICTIONARY_BATCH,
+ RECORD_BATCH,
+ TENSOR
+ ;
+
+ public static HeaderType getHeader(byte b) {
+ switch (b) {
+ case 0: return NONE;
+ case 1: return SCHEMA;
+ case 2: return DICTIONARY_BATCH;
+ case 3: return RECORD_BATCH;
+ case 4: return TENSOR;
+ default:
+ throw new UnsupportedOperationException("unknown type: " + b);
+ }
+ }
+
+ }
+
+ // Pre-allocated buffers for padding serialized ArrowMessages.
+ private static final List<ByteBuf> PADDING_BUFFERS = Arrays.asList(
+ null,
+ Unpooled.copiedBuffer(new byte[] { 0 }),
+ Unpooled.copiedBuffer(new byte[] { 0, 0 }),
+ Unpooled.copiedBuffer(new byte[] { 0, 0, 0 }),
+ Unpooled.copiedBuffer(new byte[] { 0, 0, 0, 0 }),
+ Unpooled.copiedBuffer(new byte[] { 0, 0, 0, 0, 0 }),
+ Unpooled.copiedBuffer(new byte[] { 0, 0, 0, 0, 0, 0 }),
+ Unpooled.copiedBuffer(new byte[] { 0, 0, 0, 0, 0, 0, 0 })
+ );
+
+ private final IpcOption writeOption;
+ private final FlightDescriptor descriptor;
+ private final MessageMetadataResult message;
+ private final ArrowBuf appMetadata;
+ private final List<ArrowBuf> bufs;
+ private final ArrowBodyCompression bodyCompression;
+ private final boolean tryZeroCopyWrite;
+
+ public ArrowMessage(FlightDescriptor descriptor, Schema schema, IpcOption option) {
+ this.writeOption = option;
+ ByteBuffer serializedMessage = MessageSerializer.serializeMetadata(schema, writeOption);
+ this.message = MessageMetadataResult.create(serializedMessage.slice(),
+ serializedMessage.remaining());
+ bufs = ImmutableList.of();
+ this.descriptor = descriptor;
+ this.appMetadata = null;
+ this.bodyCompression = NoCompressionCodec.DEFAULT_BODY_COMPRESSION;
+ this.tryZeroCopyWrite = false;
+ }
+
+ /**
+ * Create an ArrowMessage from a record batch and app metadata.
+ * @param batch The record batch.
+ * @param appMetadata The app metadata. May be null. Takes ownership of the buffer otherwise.
+ * @param tryZeroCopy Whether to enable the zero-copy optimization.
+ */
+ public ArrowMessage(ArrowRecordBatch batch, ArrowBuf appMetadata, boolean tryZeroCopy, IpcOption option) {
+ this.writeOption = option;
+ ByteBuffer serializedMessage = MessageSerializer.serializeMetadata(batch, writeOption);
+ this.message = MessageMetadataResult.create(serializedMessage.slice(), serializedMessage.remaining());
+ this.bufs = ImmutableList.copyOf(batch.getBuffers());
+ this.descriptor = null;
+ this.appMetadata = appMetadata;
+ this.bodyCompression = batch.getBodyCompression();
+ this.tryZeroCopyWrite = tryZeroCopy;
+ }
+
+ public ArrowMessage(ArrowDictionaryBatch batch, IpcOption option) {
+ this.writeOption = option;
+ ByteBuffer serializedMessage = MessageSerializer.serializeMetadata(batch, writeOption);
+ serializedMessage = serializedMessage.slice();
+ this.message = MessageMetadataResult.create(serializedMessage, serializedMessage.remaining());
+ // asInputStream will free the buffers implicitly, so increment the reference count
+ batch.getDictionary().getBuffers().forEach(buf -> buf.getReferenceManager().retain());
+ this.bufs = ImmutableList.copyOf(batch.getDictionary().getBuffers());
+ this.descriptor = null;
+ this.appMetadata = null;
+ this.bodyCompression = batch.getDictionary().getBodyCompression();
+ this.tryZeroCopyWrite = false;
+ }
+
+ /**
+ * Create an ArrowMessage containing only application metadata.
+ * @param appMetadata The application-provided metadata buffer.
+ */
+ public ArrowMessage(ArrowBuf appMetadata) {
+ // No need to take IpcOption as it's not used to serialize this kind of message.
+ this.writeOption = IpcOption.DEFAULT;
+ this.message = null;
+ this.bufs = ImmutableList.of();
+ this.descriptor = null;
+ this.appMetadata = appMetadata;
+ this.bodyCompression = NoCompressionCodec.DEFAULT_BODY_COMPRESSION;
+ this.tryZeroCopyWrite = false;
+ }
+
+ public ArrowMessage(FlightDescriptor descriptor) {
+ // No need to take IpcOption as it's not used to serialize this kind of message.
+ this.writeOption = IpcOption.DEFAULT;
+ this.message = null;
+ this.bufs = ImmutableList.of();
+ this.descriptor = descriptor;
+ this.appMetadata = null;
+ this.bodyCompression = NoCompressionCodec.DEFAULT_BODY_COMPRESSION;
+ this.tryZeroCopyWrite = false;
+ }
+
+ private ArrowMessage(FlightDescriptor descriptor, MessageMetadataResult message, ArrowBuf appMetadata,
+ ArrowBuf buf) {
+ // No need to take IpcOption as this is used for deserialized ArrowMessage coming from the wire.
+ this.writeOption = message != null ?
+ // avoid writing legacy ipc format by default
+ new IpcOption(false, MetadataVersion.fromFlatbufID(message.getMessage().version())) :
+ IpcOption.DEFAULT;
+ this.message = message;
+ this.descriptor = descriptor;
+ this.appMetadata = appMetadata;
+ this.bufs = buf == null ? ImmutableList.of() : ImmutableList.of(buf);
+ this.bodyCompression = NoCompressionCodec.DEFAULT_BODY_COMPRESSION;
+ this.tryZeroCopyWrite = false;
+ }
+
+ public MessageMetadataResult asSchemaMessage() {
+ return message;
+ }
+
+ public FlightDescriptor getDescriptor() {
+ return descriptor;
+ }
+
+ public HeaderType getMessageType() {
+ if (message == null) {
+ // Null message occurs for metadata-only messages (in DoExchange)
+ return HeaderType.NONE;
+ }
+ return HeaderType.getHeader(message.headerType());
+ }
+
+ public Schema asSchema() {
+ Preconditions.checkArgument(bufs.size() == 0);
+ Preconditions.checkArgument(getMessageType() == HeaderType.SCHEMA);
+ return MessageSerializer.deserializeSchema(message);
+ }
+
+ public ArrowRecordBatch asRecordBatch() throws IOException {
+ Preconditions.checkArgument(bufs.size() == 1, "A batch can only be consumed if it contains a single ArrowBuf.");
+ Preconditions.checkArgument(getMessageType() == HeaderType.RECORD_BATCH);
+
+ ArrowBuf underlying = bufs.get(0);
+
+ underlying.getReferenceManager().retain();
+ return MessageSerializer.deserializeRecordBatch(message, underlying);
+ }
+
+ public ArrowDictionaryBatch asDictionaryBatch() throws IOException {
+ Preconditions.checkArgument(bufs.size() == 1, "A batch can only be consumed if it contains a single ArrowBuf.");
+ Preconditions.checkArgument(getMessageType() == HeaderType.DICTIONARY_BATCH);
+ ArrowBuf underlying = bufs.get(0);
+ // Retain a reference to keep the batch alive when the message is closed
+ underlying.getReferenceManager().retain();
+ // Do not set drained - we still want to release our reference
+ return MessageSerializer.deserializeDictionaryBatch(message, underlying);
+ }
+
+ public Iterable<ArrowBuf> getBufs() {
+ return Iterables.unmodifiableIterable(bufs);
+ }
+
+ private static ArrowMessage frame(BufferAllocator allocator, final InputStream stream) {
+
+ try {
+ FlightDescriptor descriptor = null;
+ MessageMetadataResult header = null;
+ ArrowBuf body = null;
+ ArrowBuf appMetadata = null;
+ while (stream.available() > 0) {
+ int tag = readRawVarint32(stream);
+ switch (tag) {
+
+ case DESCRIPTOR_TAG: {
+ int size = readRawVarint32(stream);
+ byte[] bytes = new byte[size];
+ ByteStreams.readFully(stream, bytes);
+ descriptor = FlightDescriptor.parseFrom(bytes);
+ break;
+ }
+ case HEADER_TAG: {
+ int size = readRawVarint32(stream);
+ byte[] bytes = new byte[size];
+ ByteStreams.readFully(stream, bytes);
+ header = MessageMetadataResult.create(ByteBuffer.wrap(bytes), size);
+ break;
+ }
+ case APP_METADATA_TAG: {
+ int size = readRawVarint32(stream);
+ appMetadata = allocator.buffer(size);
+ GetReadableBuffer.readIntoBuffer(stream, appMetadata, size, ENABLE_ZERO_COPY_READ);
+ break;
+ }
+ case BODY_TAG:
+ if (body != null) {
+ // only read last body.
+ body.getReferenceManager().release();
+ body = null;
+ }
+ int size = readRawVarint32(stream);
+ body = allocator.buffer(size);
+ GetReadableBuffer.readIntoBuffer(stream, body, size, ENABLE_ZERO_COPY_READ);
+ break;
+
+ default:
+ // ignore unknown fields.
+ }
+ }
+ // Protobuf implementations can omit empty fields, such as body; for some message types, like RecordBatch,
+ // this will fail later as we still expect an empty buffer. In those cases only, fill in an empty buffer here -
+ // in other cases, like Schema, having an unexpected empty buffer will also cause failures.
+ // We don't fill in defaults for fields like header, for which there is no reasonable default, or for appMetadata
+ // or descriptor, which are intended to be empty in some cases.
+ if (header != null) {
+ switch (HeaderType.getHeader(header.headerType())) {
+ case SCHEMA:
+ // Ignore 0-length buffers in case a Protobuf implementation wrote it out
+ if (body != null && body.capacity() == 0) {
+ body.close();
+ body = null;
+ }
+ break;
+ case DICTIONARY_BATCH:
+ case RECORD_BATCH:
+ // A Protobuf implementation can skip 0-length bodies, so ensure we fill it in here
+ if (body == null) {
+ body = allocator.getEmpty();
+ }
+ break;
+ case NONE:
+ case TENSOR:
+ default:
+ // Do nothing
+ break;
+ }
+ }
+ return new ArrowMessage(descriptor, header, appMetadata, body);
+ } catch (Exception ioe) {
+ throw new RuntimeException(ioe);
+ }
+
+ }
+
+ private static int readRawVarint32(InputStream is) throws IOException {
+ int firstByte = is.read();
+ return CodedInputStream.readRawVarint32(firstByte, is);
+ }
+
+ /**
+ * Convert the ArrowMessage to an InputStream.
+ *
+ * <p>Implicitly, this transfers ownership of the contained buffers to the InputStream.
+ *
+ * @return InputStream
+ */
+ private InputStream asInputStream(BufferAllocator allocator) {
+ if (message == null) {
+ // If we have no IPC message, it's a pure-metadata message
+ final FlightData.Builder builder = FlightData.newBuilder();
+ if (descriptor != null) {
+ builder.setFlightDescriptor(descriptor);
+ }
+ if (appMetadata != null) {
+ builder.setAppMetadata(ByteString.copyFrom(appMetadata.nioBuffer()));
+ }
+ return NO_BODY_MARSHALLER.stream(builder.build());
+ }
+
+ try {
+ final ByteString bytes = ByteString.copyFrom(message.getMessageBuffer(),
+ message.bytesAfterMessage());
+
+ if (getMessageType() == HeaderType.SCHEMA) {
+
+ final FlightData.Builder builder = FlightData.newBuilder()
+ .setDataHeader(bytes);
+
+ if (descriptor != null) {
+ builder.setFlightDescriptor(descriptor);
+ }
+
+ Preconditions.checkArgument(bufs.isEmpty());
+ return NO_BODY_MARSHALLER.stream(builder.build());
+ }
+
+ Preconditions.checkArgument(getMessageType() == HeaderType.RECORD_BATCH ||
+ getMessageType() == HeaderType.DICTIONARY_BATCH);
+ // There may be no buffers in the case that we write only a null array
+ Preconditions.checkArgument(descriptor == null, "Descriptor should only be included in the schema message.");
+
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ CodedOutputStream cos = CodedOutputStream.newInstance(baos);
+ cos.writeBytes(FlightData.DATA_HEADER_FIELD_NUMBER, bytes);
+
+ if (appMetadata != null && appMetadata.capacity() > 0) {
+ // Must call slice() as CodedOutputStream#writeByteBuffer writes -capacity- bytes, not -limit- bytes
+ cos.writeByteBuffer(FlightData.APP_METADATA_FIELD_NUMBER, appMetadata.nioBuffer().slice());
+ }
+
+ cos.writeTag(FlightData.DATA_BODY_FIELD_NUMBER, WireFormat.WIRETYPE_LENGTH_DELIMITED);
+ int size = 0;
+ List<ByteBuf> allBufs = new ArrayList<>();
+ for (ArrowBuf b : bufs) {
+ // [ARROW-11066] This creates a Netty buffer whose refcnt is INDEPENDENT of the backing
+ // Arrow buffer. This is susceptible to use-after-free, so we subclass CompositeByteBuf
+ // below to tie the Arrow buffer refcnt to the Netty buffer refcnt
+ allBufs.add(Unpooled.wrappedBuffer(b.nioBuffer()).retain());
+ size += b.readableBytes();
+ // [ARROW-4213] These buffers must be aligned to an 8-byte boundary in order to be readable from C++.
+ if (b.readableBytes() % 8 != 0) {
+ int paddingBytes = (int) (8 - (b.readableBytes() % 8));
+ assert paddingBytes > 0 && paddingBytes < 8;
+ size += paddingBytes;
+ allBufs.add(PADDING_BUFFERS.get(paddingBytes).retain());
+ }
+ }
+ // rawvarint is used for length definition.
+ cos.writeUInt32NoTag(size);
+ cos.flush();
+
+ ByteBuf initialBuf = Unpooled.buffer(baos.size());
+ initialBuf.writeBytes(baos.toByteArray());
+ final CompositeByteBuf bb;
+ final int maxNumComponents = Math.max(2, bufs.size() + 1);
+ final ImmutableList<ByteBuf> byteBufs = ImmutableList.<ByteBuf>builder()
+ .add(initialBuf)
+ .addAll(allBufs)
+ .build();
+ if (tryZeroCopyWrite) {
+ bb = new ArrowBufRetainingCompositeByteBuf(maxNumComponents, byteBufs, bufs);
+ } else {
+ // Don't retain the buffers in the non-zero-copy path since we're copying them
+ bb = new CompositeByteBuf(UnpooledByteBufAllocator.DEFAULT, /* direct */ true, maxNumComponents, byteBufs);
+ }
+ return new DrainableByteBufInputStream(bb, tryZeroCopyWrite);
+ } catch (Exception ex) {
+ throw new RuntimeException("Unexpected IO Exception", ex);
+ }
+
+ }
+
+ /**
+ * ARROW-11066: enable the zero-copy optimization and protect against use-after-free.
+ *
+ * When you send a message through gRPC, the following happens:
+ * 1. gRPC immediately serializes the message, eventually calling asInputStream above.
+ * 2. gRPC buffers the serialized message for sending.
+ * 3. Later, gRPC will actually write out the message.
+ *
+ * The problem with this is that when the zero-copy optimization is enabled, Flight
+ * "serializes" the message by handing gRPC references to Arrow data. That means we need
+ * a way to keep the Arrow buffers valid until gRPC actually writes them, else, we'll read
+ * invalid data or segfault. gRPC doesn't know anything about Arrow buffers, either.
+ *
+ * This class solves that issue by bridging Arrow and Netty/gRPC. We increment the refcnt
+ * on a set of Arrow backing buffers and decrement them once the Netty buffers are freed
+ * by gRPC.
+ */
+ private static final class ArrowBufRetainingCompositeByteBuf extends CompositeByteBuf {
+ // Arrow buffers that back the Netty ByteBufs here; ByteBufs held by this class are
+ // either slices of one of the ArrowBufs or independently allocated.
+ final List<ArrowBuf> backingBuffers;
+ boolean freed;
+
+ ArrowBufRetainingCompositeByteBuf(int maxNumComponents, Iterable<ByteBuf> buffers, List<ArrowBuf> backingBuffers) {
+ super(UnpooledByteBufAllocator.DEFAULT, /* direct */ true, maxNumComponents, buffers);
+ this.backingBuffers = backingBuffers;
+ this.freed = false;
+ // N.B. the Netty superclass avoids enhanced-for to reduce GC pressure, so follow that here
+ for (int i = 0; i < backingBuffers.size(); i++) {
+ backingBuffers.get(i).getReferenceManager().retain();
+ }
+ }
+
+ @Override
+ protected void deallocate() {
+ super.deallocate();
+ if (freed) {
+ return;
+ }
+ freed = true;
+ for (int i = 0; i < backingBuffers.size(); i++) {
+ backingBuffers.get(i).getReferenceManager().release();
+ }
+ }
+ }
+
+ private static class DrainableByteBufInputStream extends ByteBufInputStream implements Drainable {
+
+ private final CompositeByteBuf buf;
+ private final boolean isZeroCopy;
+
+ public DrainableByteBufInputStream(CompositeByteBuf buffer, boolean isZeroCopy) {
+ super(buffer, buffer.readableBytes(), true);
+ this.buf = buffer;
+ this.isZeroCopy = isZeroCopy;
+ }
+
+ @Override
+ public int drainTo(OutputStream target) throws IOException {
+ int size = buf.readableBytes();
+ AddWritableBuffer.add(buf, target, isZeroCopy);
+ return size;
+ }
+
+ @Override
+ public void close() {
+ buf.release();
+ }
+
+
+
+ }
+
+ public static Marshaller<ArrowMessage> createMarshaller(BufferAllocator allocator) {
+ return new ArrowMessageHolderMarshaller(allocator);
+ }
+
+ private static class ArrowMessageHolderMarshaller implements Marshaller<ArrowMessage> {
+
+ private final BufferAllocator allocator;
+
+ public ArrowMessageHolderMarshaller(BufferAllocator allocator) {
+ this.allocator = allocator;
+ }
+
+ @Override
+ public InputStream stream(ArrowMessage value) {
+ return value.asInputStream(allocator);
+ }
+
+ @Override
+ public ArrowMessage parse(InputStream stream) {
+ return ArrowMessage.frame(allocator, stream);
+ }
+
+ }
+
+ @Override
+ public void close() throws Exception {
+ AutoCloseables.close(Iterables.concat(bufs, Collections.singletonList(appMetadata)));
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/AsyncPutListener.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/AsyncPutListener.java
new file mode 100644
index 000000000..a45463225
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/AsyncPutListener.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ExecutionException;
+
+import org.apache.arrow.flight.grpc.StatusUtils;
+
+/**
+ * A handler for server-sent application metadata messages during a Flight DoPut operation.
+ *
+ * <p>To handle messages, create an instance of this class overriding {@link #onNext(PutResult)}. The other methods
+ * should not be overridden.
+ */
+public class AsyncPutListener implements FlightClient.PutListener {
+
+ private CompletableFuture<Void> completed;
+
+ public AsyncPutListener() {
+ completed = new CompletableFuture<>();
+ }
+
+ /**
+ * Wait for the stream to finish on the server side. You must call this to be notified of any errors that may have
+ * happened during the upload.
+ */
+ @Override
+ public final void getResult() {
+ try {
+ completed.get();
+ } catch (ExecutionException e) {
+ throw StatusUtils.fromThrowable(e.getCause());
+ } catch (InterruptedException e) {
+ throw StatusUtils.fromThrowable(e);
+ }
+ }
+
+ @Override
+ public void onNext(PutResult val) {
+ }
+
+ @Override
+ public final void onError(Throwable t) {
+ completed.completeExceptionally(StatusUtils.fromThrowable(t));
+ }
+
+ @Override
+ public final void onCompleted() {
+ completed.complete(null);
+ }
+
+ @Override
+ public boolean isCancelled() {
+ return completed.isDone();
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/BackpressureStrategy.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/BackpressureStrategy.java
new file mode 100644
index 000000000..de34643a7
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/BackpressureStrategy.java
@@ -0,0 +1,172 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import org.apache.arrow.vector.VectorSchemaRoot;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Helper interface to dynamically handle backpressure when implementing FlightProducers.
+ * This must only be used in FlightProducer implementations that are non-blocking.
+ */
+public interface BackpressureStrategy {
+ /**
+ * The state of the client after a call to waitForListener.
+ */
+ enum WaitResult {
+ /**
+ * Listener is ready.
+ */
+ READY,
+
+ /**
+ * Listener was cancelled by the client.
+ */
+ CANCELLED,
+
+ /**
+ * Timed out waiting for the listener to change state.
+ */
+ TIMEOUT,
+
+ /**
+ * Indicates that the wait was interrupted for a reason
+ * unrelated to the listener itself.
+ */
+ OTHER
+ }
+
+ /**
+ * Set up operations to work against the given listener.
+ *
+ * This must be called exactly once and before any calls to {@link #waitForListener(long)} and
+ * {@link OutboundStreamListener#start(VectorSchemaRoot)}
+ * @param listener The listener this strategy applies to.
+ */
+ void register(FlightProducer.ServerStreamListener listener);
+
+ /**
+ * Waits for the listener to be ready or cancelled up to the given timeout.
+ *
+ * @param timeout The timeout in milliseconds. Infinite if timeout is <= 0.
+ * @return The result of the wait.
+ */
+ WaitResult waitForListener(long timeout);
+
+ /**
+ * A back pressure strategy that uses callbacks to notify when the client is ready or cancelled.
+ */
+ class CallbackBackpressureStrategy implements BackpressureStrategy {
+ private final Object lock = new Object();
+ private FlightProducer.ServerStreamListener listener;
+
+ @Override
+ public void register(FlightProducer.ServerStreamListener listener) {
+ this.listener = listener;
+ listener.setOnReadyHandler(this::onReady);
+ listener.setOnCancelHandler(this::onCancel);
+ }
+
+ @Override
+ public WaitResult waitForListener(long timeout) {
+ Preconditions.checkNotNull(listener);
+ long remainingTimeout = timeout;
+ final long startTime = System.currentTimeMillis();
+ synchronized (lock) {
+ while (!listener.isReady() && !listener.isCancelled()) {
+ try {
+ lock.wait(remainingTimeout);
+ if (timeout != 0) { // If timeout was zero explicitly, we should never report timeout.
+ remainingTimeout = startTime + timeout - System.currentTimeMillis();
+ if (remainingTimeout <= 0) {
+ return WaitResult.TIMEOUT;
+ }
+ }
+ if (!shouldContinueWaiting(listener, remainingTimeout)) {
+ return WaitResult.OTHER;
+ }
+ } catch (InterruptedException ex) {
+ Thread.currentThread().interrupt();
+ return WaitResult.OTHER;
+ }
+ }
+
+ if (listener.isReady()) {
+ return WaitResult.READY;
+ } else if (listener.isCancelled()) {
+ return WaitResult.CANCELLED;
+ } else if (System.currentTimeMillis() > startTime + timeout) {
+ return WaitResult.TIMEOUT;
+ }
+ throw new RuntimeException("Invalid state when waiting for listener.");
+ }
+ }
+
+ /**
+ * Interrupt waiting on the listener to change state.
+ *
+ * This method can be used in conjunction with
+ * {@link #shouldContinueWaiting(FlightProducer.ServerStreamListener, long)} to allow FlightProducers to
+ * terminate streams internally and notify clients.
+ */
+ public void interruptWait() {
+ synchronized (lock) {
+ lock.notifyAll();
+ }
+ }
+
+ /**
+ * Callback function to run to check if the listener should continue
+ * to be waited on if it leaves the waiting state without being cancelled,
+ * ready, or timed out.
+ *
+ * This method should be used to determine if the wait on the listener was interrupted explicitly using a
+ * call to {@link #interruptWait()} or if it was woken up due to a spurious wake.
+ */
+ protected boolean shouldContinueWaiting(FlightProducer.ServerStreamListener listener, long remainingTimeout) {
+ return true;
+ }
+
+ /**
+ * Callback to execute when the listener becomes ready.
+ */
+ protected void readyCallback() {
+ }
+
+ /**
+ * Callback to execute when the listener is cancelled.
+ */
+ protected void cancelCallback() {
+ }
+
+ private void onReady() {
+ synchronized (lock) {
+ readyCallback();
+ lock.notifyAll();
+ }
+ }
+
+ private void onCancel() {
+ synchronized (lock) {
+ cancelCallback();
+ lock.notifyAll();
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CallHeaders.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CallHeaders.java
new file mode 100644
index 000000000..32f9a8430
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CallHeaders.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.util.Set;
+
+/**
+ * A set of metadata key value pairs for a call (request or response).
+ */
+public interface CallHeaders {
+ /**
+ * Get the value of a metadata key. If multiple values are present, then get the last one.
+ */
+ String get(String key);
+
+ /**
+ * Get the value of a metadata key. If multiple values are present, then get the last one.
+ */
+ byte[] getByte(String key);
+
+ /**
+ * Get all values present for the given metadata key.
+ */
+ Iterable<String> getAll(String key);
+
+ /**
+ * Get all values present for the given metadata key.
+ */
+ Iterable<byte[]> getAllByte(String key);
+
+ /**
+ * Insert a metadata pair with the given value.
+ *
+ * <p>Duplicate metadata are permitted.
+ */
+ void insert(String key, String value);
+
+ /**
+ * Insert a metadata pair with the given value.
+ *
+ * <p>Duplicate metadata are permitted.
+ */
+ void insert(String key, byte[] value);
+
+ /** Get a set of all the metadata keys. */
+ Set<String> keys();
+
+ /** Check whether the given metadata key is present. */
+ boolean containsKey(String key);
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CallInfo.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CallInfo.java
new file mode 100644
index 000000000..744584bdf
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CallInfo.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+/**
+ * A description of a Flight call for middleware to inspect.
+ */
+public final class CallInfo {
+ private final FlightMethod method;
+
+ public CallInfo(FlightMethod method) {
+ this.method = method;
+ }
+
+ public FlightMethod method() {
+ return method;
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CallOption.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CallOption.java
new file mode 100644
index 000000000..d3ee3ab4c
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CallOption.java
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+/**
+ * Per-call RPC options. These are hints to the underlying RPC layer and may not be respected.
+ */
+public interface CallOption {
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CallOptions.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CallOptions.java
new file mode 100644
index 000000000..bbb4edef9
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CallOptions.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.util.concurrent.TimeUnit;
+
+import io.grpc.stub.AbstractStub;
+
+/**
+ * Common call options.
+ */
+public class CallOptions {
+ public static CallOption timeout(long duration, TimeUnit unit) {
+ return new Timeout(duration, unit);
+ }
+
+ static <T extends AbstractStub<T>> T wrapStub(T stub, CallOption[] options) {
+ for (CallOption option : options) {
+ if (option instanceof GrpcCallOption) {
+ stub = ((GrpcCallOption) option).wrapStub(stub);
+ }
+ }
+ return stub;
+ }
+
+ private static class Timeout implements GrpcCallOption {
+ long timeout;
+ TimeUnit timeoutUnit;
+
+ Timeout(long timeout, TimeUnit timeoutUnit) {
+ this.timeout = timeout;
+ this.timeoutUnit = timeoutUnit;
+ }
+
+ @Override
+ public <T extends AbstractStub<T>> T wrapStub(T stub) {
+ return stub.withDeadlineAfter(timeout, timeoutUnit);
+ }
+ }
+
+ /**
+ * CallOptions specific to GRPC stubs.
+ */
+ public interface GrpcCallOption extends CallOption {
+ <T extends AbstractStub<T>> T wrapStub(T stub);
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CallStatus.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CallStatus.java
new file mode 100644
index 000000000..991d0ed6a
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CallStatus.java
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.util.Objects;
+
+import org.apache.arrow.flight.FlightProducer.ServerStreamListener;
+import org.apache.arrow.flight.FlightProducer.StreamListener;
+
+/**
+ * The result of a Flight RPC, consisting of a status code with an optional description and/or exception that led
+ * to the status.
+ *
+ * <p>If raised or sent through {@link StreamListener#onError(Throwable)} or
+ * {@link ServerStreamListener#error(Throwable)}, the client call will raise the same error (a
+ * {@link FlightRuntimeException} with the same {@link FlightStatusCode} and description). The exception within, if
+ * present, will not be sent to the client.
+ */
+public class CallStatus {
+
+ private final FlightStatusCode code;
+ private final Throwable cause;
+ private final String description;
+ private final ErrorFlightMetadata metadata;
+
+ public static final CallStatus UNKNOWN = FlightStatusCode.UNKNOWN.toStatus();
+ public static final CallStatus INTERNAL = FlightStatusCode.INTERNAL.toStatus();
+ public static final CallStatus INVALID_ARGUMENT = FlightStatusCode.INVALID_ARGUMENT.toStatus();
+ public static final CallStatus TIMED_OUT = FlightStatusCode.TIMED_OUT.toStatus();
+ public static final CallStatus NOT_FOUND = FlightStatusCode.NOT_FOUND.toStatus();
+ public static final CallStatus ALREADY_EXISTS = FlightStatusCode.ALREADY_EXISTS.toStatus();
+ public static final CallStatus CANCELLED = FlightStatusCode.CANCELLED.toStatus();
+ public static final CallStatus UNAUTHENTICATED = FlightStatusCode.UNAUTHENTICATED.toStatus();
+ public static final CallStatus UNAUTHORIZED = FlightStatusCode.UNAUTHORIZED.toStatus();
+ public static final CallStatus UNIMPLEMENTED = FlightStatusCode.UNIMPLEMENTED.toStatus();
+ public static final CallStatus UNAVAILABLE = FlightStatusCode.UNAVAILABLE.toStatus();
+
+ /**
+ * Create a new status.
+ *
+ * @param code The status code.
+ * @param cause An exception that resulted in this status (or null).
+ * @param description A description of the status (or null).
+ */
+ public CallStatus(FlightStatusCode code, Throwable cause, String description, ErrorFlightMetadata metadata) {
+ this.code = Objects.requireNonNull(code);
+ this.cause = cause;
+ this.description = description == null ? "" : description;
+ this.metadata = metadata == null ? new ErrorFlightMetadata() : metadata;
+ }
+
+ /**
+ * Create a new status with no cause or description.
+ *
+ * @param code The status code.
+ */
+ public CallStatus(FlightStatusCode code) {
+ this(code, /* no cause */ null, /* no description */ null, /* no metadata */ null);
+ }
+
+ /**
+ * The status code describing the result of the RPC.
+ */
+ public FlightStatusCode code() {
+ return code;
+ }
+
+ /**
+ * The exception that led to this result. May be null.
+ */
+ public Throwable cause() {
+ return cause;
+ }
+
+ /**
+ * A description of the result.
+ */
+ public String description() {
+ return description;
+ }
+
+ /**
+ * Metadata associated with the exception.
+ *
+ * May be null.
+ */
+ public ErrorFlightMetadata metadata() {
+ return metadata;
+ }
+
+ /**
+ * Return a copy of this status with an error message.
+ */
+ public CallStatus withDescription(String message) {
+ return new CallStatus(code, cause, message, metadata);
+ }
+
+ /**
+ * Return a copy of this status with the given exception as the cause. This will not be sent over the wire.
+ */
+ public CallStatus withCause(Throwable t) {
+ return new CallStatus(code, t, description, metadata);
+ }
+
+ /**
+ * Return a copy of this status with associated exception metadata.
+ */
+ public CallStatus withMetadata(ErrorFlightMetadata metadata) {
+ return new CallStatus(code, cause, description, metadata);
+ }
+
+ /**
+ * Convert the status to an equivalent exception.
+ */
+ public FlightRuntimeException toRuntimeException() {
+ return new FlightRuntimeException(this);
+ }
+
+ @Override
+ public String toString() {
+ return "CallStatus{" +
+ "code=" + code +
+ ", cause=" + cause +
+ ", description='" + description +
+ "', metadata='" + metadata + '\'' +
+ '}';
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Criteria.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Criteria.java
new file mode 100644
index 000000000..989cd6581
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Criteria.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import org.apache.arrow.flight.impl.Flight;
+
+import com.google.protobuf.ByteString;
+
+/**
+ * An opaque object that can be used to filter a list of streams available from a server.
+ *
+ * <p>This is a POJO wrapper around the protobuf Criteria message.
+ */
+public class Criteria {
+
+ public static Criteria ALL = new Criteria((byte[]) null);
+
+ private final byte[] bytes;
+
+ public Criteria(byte[] bytes) {
+ this.bytes = bytes;
+ }
+
+ Criteria(Flight.Criteria criteria) {
+ this.bytes = criteria.getExpression().toByteArray();
+ }
+
+ /**
+ * Get the contained filter criteria.
+ */
+ public byte[] getExpression() {
+ return bytes;
+ }
+
+ Flight.Criteria asCriteria() {
+ Flight.Criteria.Builder b = Flight.Criteria.newBuilder();
+ if (bytes != null) {
+ b.setExpression(ByteString.copyFrom(bytes));
+ }
+
+ return b.build();
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/DictionaryUtils.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/DictionaryUtils.java
new file mode 100644
index 000000000..516dab01d
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/DictionaryUtils.java
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.function.Consumer;
+import java.util.stream.Collectors;
+
+import org.apache.arrow.flight.impl.Flight;
+import org.apache.arrow.util.AutoCloseables;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.VectorUnloader;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.ipc.message.ArrowDictionaryBatch;
+import org.apache.arrow.vector.ipc.message.IpcOption;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.util.DictionaryUtility;
+import org.apache.arrow.vector.validate.MetadataV4UnionChecker;
+
+/**
+ * Utilities to work with dictionaries in Flight.
+ */
+final class DictionaryUtils {
+
+ private DictionaryUtils() {
+ throw new UnsupportedOperationException("Do not instantiate this class.");
+ }
+
+ /**
+ * Generate all the necessary Flight messages to send a schema and associated dictionaries.
+ *
+ * @throws Exception if there was an error closing {@link ArrowMessage} objects. This is not generally expected.
+ */
+ static Schema generateSchemaMessages(final Schema originalSchema, final FlightDescriptor descriptor,
+ final DictionaryProvider provider, final IpcOption option,
+ final Consumer<ArrowMessage> messageCallback) throws Exception {
+ final Set<Long> dictionaryIds = new HashSet<>();
+ final Schema schema = generateSchema(originalSchema, provider, dictionaryIds);
+ MetadataV4UnionChecker.checkForUnion(schema.getFields().iterator(), option.metadataVersion);
+ // Send the schema message
+ final Flight.FlightDescriptor protoDescriptor = descriptor == null ? null : descriptor.toProtocol();
+ try (final ArrowMessage message = new ArrowMessage(protoDescriptor, schema, option)) {
+ messageCallback.accept(message);
+ }
+ // Create and write dictionary batches
+ for (Long id : dictionaryIds) {
+ final Dictionary dictionary = provider.lookup(id);
+ final FieldVector vector = dictionary.getVector();
+ final int count = vector.getValueCount();
+ // Do NOT close this root, as it does not actually own the vector.
+ final VectorSchemaRoot dictRoot = new VectorSchemaRoot(
+ Collections.singletonList(vector.getField()),
+ Collections.singletonList(vector),
+ count);
+ final VectorUnloader unloader = new VectorUnloader(dictRoot);
+ try (final ArrowDictionaryBatch dictionaryBatch = new ArrowDictionaryBatch(
+ id, unloader.getRecordBatch());
+ final ArrowMessage message = new ArrowMessage(dictionaryBatch, option)) {
+ messageCallback.accept(message);
+ }
+ }
+ return schema;
+ }
+
+ static void closeDictionaries(final Schema schema, final DictionaryProvider provider) throws Exception {
+ // Close dictionaries
+ final Set<Long> dictionaryIds = new HashSet<>();
+ schema.getFields().forEach(field -> DictionaryUtility.toMessageFormat(field, provider, dictionaryIds));
+
+ final List<AutoCloseable> dictionaryVectors = dictionaryIds.stream()
+ .map(id -> (AutoCloseable) provider.lookup(id).getVector()).collect(Collectors.toList());
+ AutoCloseables.close(dictionaryVectors);
+ }
+
+ /**
+ * Generates the schema to send with flight messages.
+ * If the schema contains no field with a dictionary, it will return the schema as is.
+ * Otherwise, it will return a newly created a new schema after converting the fields.
+ * @param originalSchema the original schema.
+ * @param provider the dictionary provider.
+ * @param dictionaryIds dictionary IDs that are used.
+ * @return the schema to send with the flight messages.
+ */
+ static Schema generateSchema(
+ final Schema originalSchema, final DictionaryProvider provider, Set<Long> dictionaryIds) {
+ // first determine if a new schema needs to be created.
+ boolean createSchema = false;
+ for (Field field : originalSchema.getFields()) {
+ if (DictionaryUtility.needConvertToMessageFormat(field)) {
+ createSchema = true;
+ break;
+ }
+ }
+
+ if (!createSchema) {
+ return originalSchema;
+ } else {
+ final List<Field> fields = new ArrayList<>(originalSchema.getFields().size());
+ for (final Field field : originalSchema.getFields()) {
+ fields.add(DictionaryUtility.toMessageFormat(field, provider, dictionaryIds));
+ }
+ return new Schema(fields, originalSchema.getCustomMetadata());
+ }
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ErrorFlightMetadata.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ErrorFlightMetadata.java
new file mode 100644
index 000000000..6669ce465
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ErrorFlightMetadata.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.nio.charset.StandardCharsets;
+import java.util.Set;
+import java.util.stream.Collectors;
+import java.util.stream.StreamSupport;
+
+import com.google.common.collect.Iterables;
+import com.google.common.collect.LinkedListMultimap;
+import com.google.common.collect.Multimap;
+
+/**
+ * metadata container specific to the binary metadata held in the grpc trailer.
+ */
+public class ErrorFlightMetadata implements CallHeaders {
+ private final Multimap<String, byte[]> metadata = LinkedListMultimap.create();
+
+ public ErrorFlightMetadata() {
+ }
+
+
+ @Override
+ public String get(String key) {
+ return new String(getByte(key), StandardCharsets.US_ASCII);
+ }
+
+ @Override
+ public byte[] getByte(String key) {
+ return Iterables.getLast(metadata.get(key));
+ }
+
+ @Override
+ public Iterable<String> getAll(String key) {
+ return StreamSupport.stream(
+ getAllByte(key).spliterator(), false)
+ .map(b -> new String(b, StandardCharsets.US_ASCII))
+ .collect(Collectors.toList());
+ }
+
+ @Override
+ public Iterable<byte[]> getAllByte(String key) {
+ return metadata.get(key);
+ }
+
+ @Override
+ public void insert(String key, String value) {
+ metadata.put(key, value.getBytes());
+ }
+
+ @Override
+ public void insert(String key, byte[] value) {
+ metadata.put(key, value);
+ }
+
+ @Override
+ public Set<String> keys() {
+ return metadata.keySet();
+ }
+
+ @Override
+ public boolean containsKey(String key) {
+ return metadata.containsKey(key);
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightBindingService.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightBindingService.java
new file mode 100644
index 000000000..ba5249b4a
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightBindingService.java
@@ -0,0 +1,174 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.util.Set;
+import java.util.concurrent.ExecutorService;
+
+import org.apache.arrow.flight.auth.ServerAuthHandler;
+import org.apache.arrow.flight.impl.Flight;
+import org.apache.arrow.flight.impl.Flight.PutResult;
+import org.apache.arrow.flight.impl.FlightServiceGrpc;
+import org.apache.arrow.memory.BufferAllocator;
+
+import com.google.common.collect.ImmutableSet;
+
+import io.grpc.BindableService;
+import io.grpc.MethodDescriptor;
+import io.grpc.MethodDescriptor.MethodType;
+import io.grpc.ServerMethodDefinition;
+import io.grpc.ServerServiceDefinition;
+import io.grpc.ServiceDescriptor;
+import io.grpc.protobuf.ProtoUtils;
+import io.grpc.stub.ServerCalls;
+import io.grpc.stub.StreamObserver;
+
+/**
+ * Extends the basic flight service to override some methods for more efficient implementations.
+ */
+class FlightBindingService implements BindableService {
+
+ private static final String DO_GET = MethodDescriptor.generateFullMethodName(FlightConstants.SERVICE, "DoGet");
+ private static final String DO_PUT = MethodDescriptor.generateFullMethodName(FlightConstants.SERVICE, "DoPut");
+ private static final String DO_EXCHANGE = MethodDescriptor.generateFullMethodName(
+ FlightConstants.SERVICE, "DoExchange");
+ private static final Set<String> OVERRIDE_METHODS = ImmutableSet.of(DO_GET, DO_PUT, DO_EXCHANGE);
+
+ private final FlightService delegate;
+ private final BufferAllocator allocator;
+
+ public FlightBindingService(BufferAllocator allocator, FlightProducer producer,
+ ServerAuthHandler authHandler, ExecutorService executor) {
+ this.allocator = allocator;
+ this.delegate = new FlightService(allocator, producer, authHandler, executor);
+ }
+
+ public static MethodDescriptor<Flight.Ticket, ArrowMessage> getDoGetDescriptor(BufferAllocator allocator) {
+ return MethodDescriptor.<Flight.Ticket, ArrowMessage>newBuilder()
+ .setType(io.grpc.MethodDescriptor.MethodType.SERVER_STREAMING)
+ .setFullMethodName(DO_GET)
+ .setSampledToLocalTracing(false)
+ .setRequestMarshaller(ProtoUtils.marshaller(Flight.Ticket.getDefaultInstance()))
+ .setResponseMarshaller(ArrowMessage.createMarshaller(allocator))
+ .setSchemaDescriptor(FlightServiceGrpc.getDoGetMethod().getSchemaDescriptor())
+ .build();
+ }
+
+ public static MethodDescriptor<ArrowMessage, Flight.PutResult> getDoPutDescriptor(BufferAllocator allocator) {
+ return MethodDescriptor.<ArrowMessage, Flight.PutResult>newBuilder()
+ .setType(MethodType.BIDI_STREAMING)
+ .setFullMethodName(DO_PUT)
+ .setSampledToLocalTracing(false)
+ .setRequestMarshaller(ArrowMessage.createMarshaller(allocator))
+ .setResponseMarshaller(ProtoUtils.marshaller(Flight.PutResult.getDefaultInstance()))
+ .setSchemaDescriptor(FlightServiceGrpc.getDoPutMethod().getSchemaDescriptor())
+ .build();
+ }
+
+ public static MethodDescriptor<ArrowMessage, ArrowMessage> getDoExchangeDescriptor(BufferAllocator allocator) {
+ return MethodDescriptor.<ArrowMessage, ArrowMessage>newBuilder()
+ .setType(MethodType.BIDI_STREAMING)
+ .setFullMethodName(DO_EXCHANGE)
+ .setSampledToLocalTracing(false)
+ .setRequestMarshaller(ArrowMessage.createMarshaller(allocator))
+ .setResponseMarshaller(ArrowMessage.createMarshaller(allocator))
+ .setSchemaDescriptor(FlightServiceGrpc.getDoExchangeMethod().getSchemaDescriptor())
+ .build();
+ }
+
+ @Override
+ public ServerServiceDefinition bindService() {
+ final ServerServiceDefinition baseDefinition = delegate.bindService();
+
+ final MethodDescriptor<Flight.Ticket, ArrowMessage> doGetDescriptor = getDoGetDescriptor(allocator);
+ final MethodDescriptor<ArrowMessage, Flight.PutResult> doPutDescriptor = getDoPutDescriptor(allocator);
+ final MethodDescriptor<ArrowMessage, ArrowMessage> doExchangeDescriptor = getDoExchangeDescriptor(allocator);
+
+ // Make sure we preserve SchemaDescriptor fields on methods so that gRPC reflection still works.
+ final ServiceDescriptor.Builder serviceDescriptorBuilder = ServiceDescriptor.newBuilder(FlightConstants.SERVICE)
+ .setSchemaDescriptor(baseDefinition.getServiceDescriptor().getSchemaDescriptor());
+ serviceDescriptorBuilder.addMethod(doGetDescriptor);
+ serviceDescriptorBuilder.addMethod(doPutDescriptor);
+ serviceDescriptorBuilder.addMethod(doExchangeDescriptor);
+ for (MethodDescriptor<?, ?> definition : baseDefinition.getServiceDescriptor().getMethods()) {
+ if (OVERRIDE_METHODS.contains(definition.getFullMethodName())) {
+ continue;
+ }
+
+ serviceDescriptorBuilder.addMethod(definition);
+ }
+
+ final ServiceDescriptor serviceDescriptor = serviceDescriptorBuilder.build();
+ ServerServiceDefinition.Builder serviceBuilder = ServerServiceDefinition.builder(serviceDescriptor);
+ serviceBuilder.addMethod(doGetDescriptor, ServerCalls.asyncServerStreamingCall(new DoGetMethod(delegate)));
+ serviceBuilder.addMethod(doPutDescriptor, ServerCalls.asyncBidiStreamingCall(new DoPutMethod(delegate)));
+ serviceBuilder.addMethod(doExchangeDescriptor, ServerCalls.asyncBidiStreamingCall(new DoExchangeMethod(delegate)));
+
+ // copy over not-overridden methods.
+ for (ServerMethodDefinition<?, ?> definition : baseDefinition.getMethods()) {
+ if (OVERRIDE_METHODS.contains(definition.getMethodDescriptor().getFullMethodName())) {
+ continue;
+ }
+
+ serviceBuilder.addMethod(definition);
+ }
+
+ return serviceBuilder.build();
+ }
+
+ private static class DoGetMethod implements ServerCalls.ServerStreamingMethod<Flight.Ticket, ArrowMessage> {
+
+ private final FlightService delegate;
+
+ public DoGetMethod(FlightService delegate) {
+ this.delegate = delegate;
+ }
+
+ @Override
+ public void invoke(Flight.Ticket request, StreamObserver<ArrowMessage> responseObserver) {
+ delegate.doGetCustom(request, responseObserver);
+ }
+ }
+
+ private static class DoPutMethod implements ServerCalls.BidiStreamingMethod<ArrowMessage, PutResult> {
+ private final FlightService delegate;
+
+ public DoPutMethod(FlightService delegate) {
+ this.delegate = delegate;
+ }
+
+ @Override
+ public StreamObserver<ArrowMessage> invoke(StreamObserver<PutResult> responseObserver) {
+ return delegate.doPutCustom(responseObserver);
+ }
+ }
+
+ private static class DoExchangeMethod implements ServerCalls.BidiStreamingMethod<ArrowMessage, ArrowMessage> {
+ private final FlightService delegate;
+
+ public DoExchangeMethod(FlightService delegate) {
+ this.delegate = delegate;
+ }
+
+ @Override
+ public StreamObserver<ArrowMessage> invoke(StreamObserver<ArrowMessage> responseObserver) {
+ return delegate.doExchangeCustom(responseObserver);
+ }
+ }
+
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightCallHeaders.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightCallHeaders.java
new file mode 100644
index 000000000..dd26d1908
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightCallHeaders.java
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.util.Collection;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ArrayListMultimap;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Multimap;
+
+import io.grpc.Metadata;
+
+/**
+ * An implementation of the Flight headers interface for headers.
+ */
+public class FlightCallHeaders implements CallHeaders {
+ private final Multimap<String, Object> keysAndValues;
+
+ public FlightCallHeaders() {
+ this.keysAndValues = ArrayListMultimap.create();
+ }
+
+ @Override
+ public String get(String key) {
+ final Collection<Object> values = this.keysAndValues.get(key);
+ if (values.isEmpty()) {
+ return null;
+ }
+
+ if (key.endsWith(Metadata.BINARY_HEADER_SUFFIX)) {
+ return new String((byte[]) Iterables.get(values, 0));
+ }
+
+ return (String) Iterables.get(values, 0);
+ }
+
+ @Override
+ public byte[] getByte(String key) {
+ final Collection<Object> values = this.keysAndValues.get(key);
+ if (values.isEmpty()) {
+ return null;
+ }
+
+ if (key.endsWith(Metadata.BINARY_HEADER_SUFFIX)) {
+ return (byte[]) Iterables.get(values, 0);
+ }
+
+ return ((String) Iterables.get(values, 0)).getBytes();
+ }
+
+ @Override
+ public Iterable<String> getAll(String key) {
+ if (key.endsWith(Metadata.BINARY_HEADER_SUFFIX)) {
+ return this.keysAndValues.get(key).stream().map(o -> new String((byte[]) o)).collect(Collectors.toList());
+ }
+ return (Collection<String>) (Collection<?>) this.keysAndValues.get(key);
+ }
+
+ @Override
+ public Iterable<byte[]> getAllByte(String key) {
+ if (key.endsWith(Metadata.BINARY_HEADER_SUFFIX)) {
+ return (Collection<byte[]>) (Collection<?>) this.keysAndValues.get(key);
+ }
+ return this.keysAndValues.get(key).stream().map(o -> ((String) o).getBytes()).collect(Collectors.toList());
+ }
+
+ @Override
+ public void insert(String key, String value) {
+ this.keysAndValues.put(key, value);
+ }
+
+ @Override
+ public void insert(String key, byte[] value) {
+ Preconditions.checkArgument(key.endsWith("-bin"), "Binary header is named %s. It must end with %s", key, "-bin");
+ Preconditions.checkArgument(key.length() > "-bin".length(), "empty key name");
+
+ this.keysAndValues.put(key, value);
+ }
+
+ @Override
+ public Set<String> keys() {
+ return this.keysAndValues.keySet();
+ }
+
+ @Override
+ public boolean containsKey(String key) {
+ return this.keysAndValues.containsKey(key);
+ }
+
+ public String toString() {
+ return this.keysAndValues.toString();
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClient.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClient.java
new file mode 100644
index 000000000..762b37859
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClient.java
@@ -0,0 +1,721 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.io.InputStream;
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Optional;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.TimeUnit;
+import java.util.function.BooleanSupplier;
+
+import javax.net.ssl.SSLException;
+
+import org.apache.arrow.flight.FlightProducer.StreamListener;
+import org.apache.arrow.flight.auth.BasicClientAuthHandler;
+import org.apache.arrow.flight.auth.ClientAuthHandler;
+import org.apache.arrow.flight.auth.ClientAuthInterceptor;
+import org.apache.arrow.flight.auth.ClientAuthWrapper;
+import org.apache.arrow.flight.auth2.BasicAuthCredentialWriter;
+import org.apache.arrow.flight.auth2.ClientBearerHeaderHandler;
+import org.apache.arrow.flight.auth2.ClientHandshakeWrapper;
+import org.apache.arrow.flight.auth2.ClientIncomingAuthHeaderMiddleware;
+import org.apache.arrow.flight.grpc.ClientInterceptorAdapter;
+import org.apache.arrow.flight.grpc.CredentialCallOption;
+import org.apache.arrow.flight.grpc.StatusUtils;
+import org.apache.arrow.flight.impl.Flight;
+import org.apache.arrow.flight.impl.Flight.Empty;
+import org.apache.arrow.flight.impl.FlightServiceGrpc;
+import org.apache.arrow.flight.impl.FlightServiceGrpc.FlightServiceBlockingStub;
+import org.apache.arrow.flight.impl.FlightServiceGrpc.FlightServiceStub;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.dictionary.DictionaryProvider.MapDictionaryProvider;
+
+import io.grpc.Channel;
+import io.grpc.ClientCall;
+import io.grpc.ClientInterceptor;
+import io.grpc.ClientInterceptors;
+import io.grpc.ManagedChannel;
+import io.grpc.MethodDescriptor;
+import io.grpc.StatusRuntimeException;
+import io.grpc.netty.GrpcSslContexts;
+import io.grpc.netty.NettyChannelBuilder;
+import io.grpc.stub.ClientCallStreamObserver;
+import io.grpc.stub.ClientCalls;
+import io.grpc.stub.ClientResponseObserver;
+import io.grpc.stub.StreamObserver;
+import io.netty.channel.EventLoopGroup;
+import io.netty.channel.ServerChannel;
+import io.netty.handler.ssl.SslContextBuilder;
+import io.netty.handler.ssl.util.InsecureTrustManagerFactory;
+
+/**
+ * Client for Flight services.
+ */
+public class FlightClient implements AutoCloseable {
+ private static final int PENDING_REQUESTS = 5;
+ /** The maximum number of trace events to keep on the gRPC Channel. This value disables channel tracing. */
+ private static final int MAX_CHANNEL_TRACE_EVENTS = 0;
+ private final BufferAllocator allocator;
+ private final ManagedChannel channel;
+ private final Channel interceptedChannel;
+ private final FlightServiceBlockingStub blockingStub;
+ private final FlightServiceStub asyncStub;
+ private final ClientAuthInterceptor authInterceptor = new ClientAuthInterceptor();
+ private final MethodDescriptor<Flight.Ticket, ArrowMessage> doGetDescriptor;
+ private final MethodDescriptor<ArrowMessage, Flight.PutResult> doPutDescriptor;
+ private final MethodDescriptor<ArrowMessage, ArrowMessage> doExchangeDescriptor;
+ private final List<FlightClientMiddleware.Factory> middleware;
+
+ /**
+ * Create a Flight client from an allocator and a gRPC channel.
+ */
+ FlightClient(BufferAllocator incomingAllocator, ManagedChannel channel,
+ List<FlightClientMiddleware.Factory> middleware) {
+ this.allocator = incomingAllocator.newChildAllocator("flight-client", 0, Long.MAX_VALUE);
+ this.channel = channel;
+ this.middleware = middleware;
+
+ final ClientInterceptor[] interceptors;
+ interceptors = new ClientInterceptor[]{authInterceptor, new ClientInterceptorAdapter(middleware)};
+
+ // Create a channel with interceptors pre-applied for DoGet and DoPut
+ this.interceptedChannel = ClientInterceptors.intercept(channel, interceptors);
+
+ blockingStub = FlightServiceGrpc.newBlockingStub(interceptedChannel);
+ asyncStub = FlightServiceGrpc.newStub(interceptedChannel);
+ doGetDescriptor = FlightBindingService.getDoGetDescriptor(allocator);
+ doPutDescriptor = FlightBindingService.getDoPutDescriptor(allocator);
+ doExchangeDescriptor = FlightBindingService.getDoExchangeDescriptor(allocator);
+ }
+
+ /**
+ * Get a list of available flights.
+ *
+ * @param criteria Criteria for selecting flights
+ * @param options RPC-layer hints for the call.
+ * @return FlightInfo Iterable
+ */
+ public Iterable<FlightInfo> listFlights(Criteria criteria, CallOption... options) {
+ final Iterator<Flight.FlightInfo> flights;
+ try {
+ flights = CallOptions.wrapStub(blockingStub, options)
+ .listFlights(criteria.asCriteria());
+ } catch (StatusRuntimeException sre) {
+ throw StatusUtils.fromGrpcRuntimeException(sre);
+ }
+ return () -> StatusUtils.wrapIterator(flights, t -> {
+ try {
+ return new FlightInfo(t);
+ } catch (URISyntaxException e) {
+ // We don't expect this will happen for conforming Flight implementations. For instance, a Java server
+ // itself wouldn't be able to construct an invalid Location.
+ throw new RuntimeException(e);
+ }
+ });
+ }
+
+ /**
+ * Lists actions available on the Flight service.
+ *
+ * @param options RPC-layer hints for the call.
+ */
+ public Iterable<ActionType> listActions(CallOption... options) {
+ final Iterator<Flight.ActionType> actions;
+ try {
+ actions = CallOptions.wrapStub(blockingStub, options)
+ .listActions(Empty.getDefaultInstance());
+ } catch (StatusRuntimeException sre) {
+ throw StatusUtils.fromGrpcRuntimeException(sre);
+ }
+ return () -> StatusUtils.wrapIterator(actions, ActionType::new);
+ }
+
+ /**
+ * Performs an action on the Flight service.
+ *
+ * @param action The action to perform.
+ * @param options RPC-layer hints for this call.
+ * @return An iterator of results.
+ */
+ public Iterator<Result> doAction(Action action, CallOption... options) {
+ return StatusUtils
+ .wrapIterator(CallOptions.wrapStub(blockingStub, options).doAction(action.toProtocol()), Result::new);
+ }
+
+ /**
+ * Authenticates with a username and password.
+ */
+ public void authenticateBasic(String username, String password) {
+ BasicClientAuthHandler basicClient = new BasicClientAuthHandler(username, password);
+ authenticate(basicClient);
+ }
+
+ /**
+ * Authenticates against the Flight service.
+ *
+ * @param options RPC-layer hints for this call.
+ * @param handler The auth mechanism to use.
+ */
+ public void authenticate(ClientAuthHandler handler, CallOption... options) {
+ Preconditions.checkArgument(!authInterceptor.hasAuthHandler(), "Auth already completed.");
+ ClientAuthWrapper.doClientAuth(handler, CallOptions.wrapStub(asyncStub, options));
+ authInterceptor.setAuthHandler(handler);
+ }
+
+ /**
+ * Authenticates with a username and password.
+ *
+ * @param username the username.
+ * @param password the password.
+ * @return a CredentialCallOption containing a bearer token if the server emitted one, or
+ * empty if no bearer token was returned. This can be used in subsequent API calls.
+ */
+ public Optional<CredentialCallOption> authenticateBasicToken(String username, String password) {
+ final ClientIncomingAuthHeaderMiddleware.Factory clientAuthMiddleware =
+ new ClientIncomingAuthHeaderMiddleware.Factory(new ClientBearerHeaderHandler());
+ middleware.add(clientAuthMiddleware);
+ handshake(new CredentialCallOption(new BasicAuthCredentialWriter(username, password)));
+
+ return Optional.ofNullable(clientAuthMiddleware.getCredentialCallOption());
+ }
+
+ /**
+ * Executes the handshake against the Flight service.
+ *
+ * @param options RPC-layer hints for this call.
+ */
+ public void handshake(CallOption... options) {
+ ClientHandshakeWrapper.doClientHandshake(CallOptions.wrapStub(asyncStub, options));
+ }
+
+ /**
+ * Create or append a descriptor with another stream.
+ *
+ * @param descriptor FlightDescriptor the descriptor for the data
+ * @param root VectorSchemaRoot the root containing data
+ * @param metadataListener A handler for metadata messages from the server. This will be passed buffers that will be
+ * freed after {@link StreamListener#onNext(Object)} is called!
+ * @param options RPC-layer hints for this call.
+ * @return ClientStreamListener an interface to control uploading data
+ */
+ public ClientStreamListener startPut(FlightDescriptor descriptor, VectorSchemaRoot root,
+ PutListener metadataListener, CallOption... options) {
+ return startPut(descriptor, root, new MapDictionaryProvider(), metadataListener, options);
+ }
+
+ /**
+ * Create or append a descriptor with another stream.
+ * @param descriptor FlightDescriptor the descriptor for the data
+ * @param root VectorSchemaRoot the root containing data
+ * @param metadataListener A handler for metadata messages from the server.
+ * @param options RPC-layer hints for this call.
+ * @return ClientStreamListener an interface to control uploading data.
+ * {@link ClientStreamListener#start(VectorSchemaRoot, DictionaryProvider)} will already have been called.
+ */
+ public ClientStreamListener startPut(FlightDescriptor descriptor, VectorSchemaRoot root, DictionaryProvider provider,
+ PutListener metadataListener, CallOption... options) {
+ Preconditions.checkNotNull(root, "root must not be null");
+ Preconditions.checkNotNull(provider, "provider must not be null");
+ final ClientStreamListener writer = startPut(descriptor, metadataListener, options);
+ writer.start(root, provider);
+ return writer;
+ }
+
+ /**
+ * Create or append a descriptor with another stream.
+ * @param descriptor FlightDescriptor the descriptor for the data
+ * @param metadataListener A handler for metadata messages from the server.
+ * @param options RPC-layer hints for this call.
+ * @return ClientStreamListener an interface to control uploading data.
+ * {@link ClientStreamListener#start(VectorSchemaRoot, DictionaryProvider)} will NOT already have been called.
+ */
+ public ClientStreamListener startPut(FlightDescriptor descriptor, PutListener metadataListener,
+ CallOption... options) {
+ Preconditions.checkNotNull(descriptor, "descriptor must not be null");
+ Preconditions.checkNotNull(metadataListener, "metadataListener must not be null");
+ final io.grpc.CallOptions callOptions = CallOptions.wrapStub(asyncStub, options).getCallOptions();
+
+ try {
+ final SetStreamObserver resultObserver = new SetStreamObserver(allocator, metadataListener);
+ ClientCallStreamObserver<ArrowMessage> observer = (ClientCallStreamObserver<ArrowMessage>)
+ ClientCalls.asyncBidiStreamingCall(
+ interceptedChannel.newCall(doPutDescriptor, callOptions), resultObserver);
+ return new PutObserver(
+ descriptor, observer, metadataListener::isCancelled, metadataListener::getResult);
+ } catch (StatusRuntimeException sre) {
+ throw StatusUtils.fromGrpcRuntimeException(sre);
+ }
+ }
+
+ /**
+ * Get info on a stream.
+ * @param descriptor The descriptor for the stream.
+ * @param options RPC-layer hints for this call.
+ */
+ public FlightInfo getInfo(FlightDescriptor descriptor, CallOption... options) {
+ try {
+ return new FlightInfo(CallOptions.wrapStub(blockingStub, options).getFlightInfo(descriptor.toProtocol()));
+ } catch (URISyntaxException e) {
+ // We don't expect this will happen for conforming Flight implementations. For instance, a Java server
+ // itself wouldn't be able to construct an invalid Location.
+ throw new RuntimeException(e);
+ } catch (StatusRuntimeException sre) {
+ throw StatusUtils.fromGrpcRuntimeException(sre);
+ }
+ }
+
+ /**
+ * Get schema for a stream.
+ * @param descriptor The descriptor for the stream.
+ * @param options RPC-layer hints for this call.
+ */
+ public SchemaResult getSchema(FlightDescriptor descriptor, CallOption... options) {
+ return SchemaResult.fromProtocol(CallOptions.wrapStub(blockingStub, options).getSchema(descriptor.toProtocol()));
+ }
+
+ /**
+ * Retrieve a stream from the server.
+ * @param ticket The ticket granting access to the data stream.
+ * @param options RPC-layer hints for this call.
+ */
+ public FlightStream getStream(Ticket ticket, CallOption... options) {
+ final io.grpc.CallOptions callOptions = CallOptions.wrapStub(asyncStub, options).getCallOptions();
+ ClientCall<Flight.Ticket, ArrowMessage> call = interceptedChannel.newCall(doGetDescriptor, callOptions);
+ FlightStream stream = new FlightStream(
+ allocator,
+ PENDING_REQUESTS,
+ (String message, Throwable cause) -> call.cancel(message, cause),
+ (count) -> call.request(count));
+
+ final StreamObserver<ArrowMessage> delegate = stream.asObserver();
+ ClientResponseObserver<Flight.Ticket, ArrowMessage> clientResponseObserver =
+ new ClientResponseObserver<Flight.Ticket, ArrowMessage>() {
+
+ @Override
+ public void beforeStart(ClientCallStreamObserver<org.apache.arrow.flight.impl.Flight.Ticket> requestStream) {
+ requestStream.disableAutoInboundFlowControl();
+ }
+
+ @Override
+ public void onNext(ArrowMessage value) {
+ delegate.onNext(value);
+ }
+
+ @Override
+ public void onError(Throwable t) {
+ delegate.onError(StatusUtils.toGrpcException(t));
+ }
+
+ @Override
+ public void onCompleted() {
+ delegate.onCompleted();
+ }
+
+ };
+
+ ClientCalls.asyncServerStreamingCall(call, ticket.toProtocol(), clientResponseObserver);
+ return stream;
+ }
+
+ /**
+ * Initiate a bidirectional data exchange with the server.
+ *
+ * @param descriptor A descriptor for the data stream.
+ * @param options RPC call options.
+ * @return A pair of a readable stream and a writable stream.
+ */
+ public ExchangeReaderWriter doExchange(FlightDescriptor descriptor, CallOption... options) {
+ Preconditions.checkNotNull(descriptor, "descriptor must not be null");
+ final io.grpc.CallOptions callOptions = CallOptions.wrapStub(asyncStub, options).getCallOptions();
+
+ try {
+ final ClientCall<ArrowMessage, ArrowMessage> call = interceptedChannel.newCall(doExchangeDescriptor, callOptions);
+ final FlightStream stream = new FlightStream(allocator, PENDING_REQUESTS, call::cancel, call::request);
+ final ClientCallStreamObserver<ArrowMessage> observer = (ClientCallStreamObserver<ArrowMessage>)
+ ClientCalls.asyncBidiStreamingCall(call, stream.asObserver());
+ final ClientStreamListener writer = new PutObserver(
+ descriptor, observer, stream.cancelled::isDone,
+ () -> {
+ try {
+ stream.completed.get();
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ throw CallStatus.INTERNAL
+ .withDescription("Client error: interrupted while completing call")
+ .withCause(e)
+ .toRuntimeException();
+ } catch (ExecutionException e) {
+ throw CallStatus.INTERNAL
+ .withDescription("Client error: internal while completing call")
+ .withCause(e)
+ .toRuntimeException();
+ }
+ });
+ // Send the descriptor to start.
+ try (final ArrowMessage message = new ArrowMessage(descriptor.toProtocol())) {
+ observer.onNext(message);
+ } catch (Exception e) {
+ throw CallStatus.INTERNAL
+ .withCause(e)
+ .withDescription("Could not write descriptor " + descriptor)
+ .toRuntimeException();
+ }
+ return new ExchangeReaderWriter(stream, writer);
+ } catch (StatusRuntimeException sre) {
+ throw StatusUtils.fromGrpcRuntimeException(sre);
+ }
+ }
+
+ /** A pair of a reader and a writer for a DoExchange call. */
+ public static class ExchangeReaderWriter implements AutoCloseable {
+ private final FlightStream reader;
+ private final ClientStreamListener writer;
+
+ ExchangeReaderWriter(FlightStream reader, ClientStreamListener writer) {
+ this.reader = reader;
+ this.writer = writer;
+ }
+
+ /** Get the reader for the call. */
+ public FlightStream getReader() {
+ return reader;
+ }
+
+ /** Get the writer for the call. */
+ public ClientStreamListener getWriter() {
+ return writer;
+ }
+
+ /** Shut down the streams in this call. */
+ @Override
+ public void close() throws Exception {
+ reader.close();
+ }
+ }
+
+ /**
+ * A stream observer for Flight.PutResult
+ */
+ private static class SetStreamObserver implements StreamObserver<Flight.PutResult> {
+ private final BufferAllocator allocator;
+ private final StreamListener<PutResult> listener;
+
+ SetStreamObserver(BufferAllocator allocator, StreamListener<PutResult> listener) {
+ super();
+ this.allocator = allocator;
+ this.listener = listener == null ? NoOpStreamListener.getInstance() : listener;
+ }
+
+ @Override
+ public void onNext(Flight.PutResult value) {
+ try (final PutResult message = PutResult.fromProtocol(allocator, value)) {
+ listener.onNext(message);
+ }
+ }
+
+ @Override
+ public void onError(Throwable t) {
+ listener.onError(StatusUtils.fromThrowable(t));
+ }
+
+ @Override
+ public void onCompleted() {
+ listener.onCompleted();
+ }
+ }
+
+ /**
+ * The implementation of a {@link ClientStreamListener} for writing data to a Flight server.
+ */
+ static class PutObserver extends OutboundStreamListenerImpl implements ClientStreamListener {
+ private final BooleanSupplier isCancelled;
+ private final Runnable getResult;
+
+ /**
+ * Create a new client stream listener.
+ *
+ * @param descriptor The descriptor for the stream.
+ * @param observer The write-side gRPC StreamObserver.
+ * @param isCancelled A flag to check if the call has been cancelled.
+ * @param getResult A flag that blocks until the overall call completes.
+ */
+ PutObserver(FlightDescriptor descriptor, ClientCallStreamObserver<ArrowMessage> observer,
+ BooleanSupplier isCancelled, Runnable getResult) {
+ super(descriptor, observer);
+ Preconditions.checkNotNull(descriptor, "descriptor must be provided");
+ Preconditions.checkNotNull(isCancelled, "isCancelled must be provided");
+ Preconditions.checkNotNull(getResult, "getResult must be provided");
+ this.isCancelled = isCancelled;
+ this.getResult = getResult;
+ this.unloader = null;
+ }
+
+ @Override
+ protected void waitUntilStreamReady() {
+ // Check isCancelled as well to avoid inadvertently blocking forever
+ // (so long as PutListener properly implements it)
+ while (!responseObserver.isReady() && !isCancelled.getAsBoolean()) {
+ /* busy wait */
+ }
+ }
+
+ @Override
+ public void getResult() {
+ getResult.run();
+ }
+ }
+
+ /**
+ * Interface for writers to an Arrow data stream.
+ */
+ public interface ClientStreamListener extends OutboundStreamListener {
+
+ /**
+ * Wait for the stream to finish on the server side. You must call this to be notified of any errors that may have
+ * happened during the upload.
+ */
+ void getResult();
+ }
+
+ /**
+ * A handler for server-sent application metadata messages during a Flight DoPut operation.
+ *
+ * <p>Generally, instead of implementing this yourself, you should use {@link AsyncPutListener} or {@link
+ * SyncPutListener}.
+ */
+ public interface PutListener extends StreamListener<PutResult> {
+
+ /**
+ * Wait for the stream to finish on the server side. You must call this to be notified of any errors that may have
+ * happened during the upload.
+ */
+ void getResult();
+
+ /**
+ * Called when a message from the server is received.
+ *
+ * @param val The application metadata. This buffer will be reclaimed once onNext returns; you must retain a
+ * reference to use it outside this method.
+ */
+ @Override
+ void onNext(PutResult val);
+
+ /**
+ * Check if the call has been cancelled.
+ *
+ * <p>By default, this always returns false. Implementations should provide an appropriate implementation, as
+ * otherwise, a DoPut operation may inadvertently block forever.
+ */
+ default boolean isCancelled() {
+ return false;
+ }
+ }
+
+ /**
+ * Shut down this client.
+ */
+ public void close() throws InterruptedException {
+ channel.shutdown().awaitTermination(5, TimeUnit.SECONDS);
+ allocator.close();
+ }
+
+ /**
+ * Create a builder for a Flight client.
+ */
+ public static Builder builder() {
+ return new Builder();
+ }
+
+ /**
+ * Create a builder for a Flight client.
+ * @param allocator The allocator to use for the client.
+ * @param location The location to connect to.
+ */
+ public static Builder builder(BufferAllocator allocator, Location location) {
+ return new Builder(allocator, location);
+ }
+
+ /**
+ * A builder for Flight clients.
+ */
+ public static final class Builder {
+ private BufferAllocator allocator;
+ private Location location;
+ private boolean forceTls = false;
+ private int maxInboundMessageSize = FlightServer.MAX_GRPC_MESSAGE_SIZE;
+ private InputStream trustedCertificates = null;
+ private InputStream clientCertificate = null;
+ private InputStream clientKey = null;
+ private String overrideHostname = null;
+ private List<FlightClientMiddleware.Factory> middleware = new ArrayList<>();
+ private boolean verifyServer = true;
+
+ private Builder() {
+ }
+
+ private Builder(BufferAllocator allocator, Location location) {
+ this.allocator = Preconditions.checkNotNull(allocator);
+ this.location = Preconditions.checkNotNull(location);
+ }
+
+ /**
+ * Force the client to connect over TLS.
+ */
+ public Builder useTls() {
+ this.forceTls = true;
+ return this;
+ }
+
+ /** Override the hostname checked for TLS. Use with caution in production. */
+ public Builder overrideHostname(final String hostname) {
+ this.overrideHostname = hostname;
+ return this;
+ }
+
+ /** Set the maximum inbound message size. */
+ public Builder maxInboundMessageSize(int maxSize) {
+ Preconditions.checkArgument(maxSize > 0);
+ this.maxInboundMessageSize = maxSize;
+ return this;
+ }
+
+ /** Set the trusted TLS certificates. */
+ public Builder trustedCertificates(final InputStream stream) {
+ this.trustedCertificates = Preconditions.checkNotNull(stream);
+ return this;
+ }
+
+ /** Set the trusted TLS certificates. */
+ public Builder clientCertificate(final InputStream clientCertificate, final InputStream clientKey) {
+ Preconditions.checkNotNull(clientKey);
+ this.clientCertificate = Preconditions.checkNotNull(clientCertificate);
+ this.clientKey = Preconditions.checkNotNull(clientKey);
+ return this;
+ }
+
+ public Builder allocator(BufferAllocator allocator) {
+ this.allocator = Preconditions.checkNotNull(allocator);
+ return this;
+ }
+
+ public Builder location(Location location) {
+ this.location = Preconditions.checkNotNull(location);
+ return this;
+ }
+
+ public Builder intercept(FlightClientMiddleware.Factory factory) {
+ middleware.add(factory);
+ return this;
+ }
+
+ public Builder verifyServer(boolean verifyServer) {
+ this.verifyServer = verifyServer;
+ return this;
+ }
+
+ /**
+ * Create the client from this builder.
+ */
+ public FlightClient build() {
+ final NettyChannelBuilder builder;
+
+ switch (location.getUri().getScheme()) {
+ case LocationSchemes.GRPC:
+ case LocationSchemes.GRPC_INSECURE:
+ case LocationSchemes.GRPC_TLS: {
+ builder = NettyChannelBuilder.forAddress(location.toSocketAddress());
+ break;
+ }
+ case LocationSchemes.GRPC_DOMAIN_SOCKET: {
+ // The implementation is platform-specific, so we have to find the classes at runtime
+ builder = NettyChannelBuilder.forAddress(location.toSocketAddress());
+ try {
+ try {
+ // Linux
+ builder.channelType(
+ (Class<? extends ServerChannel>) Class.forName("io.netty.channel.epoll.EpollDomainSocketChannel"));
+ final EventLoopGroup elg = (EventLoopGroup) Class.forName("io.netty.channel.epoll.EpollEventLoopGroup")
+ .newInstance();
+ builder.eventLoopGroup(elg);
+ } catch (ClassNotFoundException e) {
+ // BSD
+ builder.channelType(
+ (Class<? extends ServerChannel>) Class.forName("io.netty.channel.kqueue.KQueueDomainSocketChannel"));
+ final EventLoopGroup elg = (EventLoopGroup) Class.forName("io.netty.channel.kqueue.KQueueEventLoopGroup")
+ .newInstance();
+ builder.eventLoopGroup(elg);
+ }
+ } catch (ClassNotFoundException | InstantiationException | IllegalAccessException e) {
+ throw new UnsupportedOperationException(
+ "Could not find suitable Netty native transport implementation for domain socket address.");
+ }
+ break;
+ }
+ default:
+ throw new IllegalArgumentException("Scheme is not supported: " + location.getUri().getScheme());
+ }
+
+ if (this.forceTls || LocationSchemes.GRPC_TLS.equals(location.getUri().getScheme())) {
+ builder.useTransportSecurity();
+
+ final boolean hasTrustedCerts = this.trustedCertificates != null;
+ final boolean hasKeyCertPair = this.clientCertificate != null && this.clientKey != null;
+ if (!this.verifyServer && (hasTrustedCerts || hasKeyCertPair)) {
+ throw new IllegalArgumentException("FlightClient has been configured to disable server verification, " +
+ "but certificate options have been specified.");
+ }
+
+ final SslContextBuilder sslContextBuilder = GrpcSslContexts.forClient();
+
+ if (!this.verifyServer) {
+ sslContextBuilder.trustManager(InsecureTrustManagerFactory.INSTANCE);
+ } else if (this.trustedCertificates != null || this.clientCertificate != null || this.clientKey != null) {
+ if (this.trustedCertificates != null) {
+ sslContextBuilder.trustManager(this.trustedCertificates);
+ }
+ if (this.clientCertificate != null && this.clientKey != null) {
+ sslContextBuilder.keyManager(this.clientCertificate, this.clientKey);
+ }
+ }
+ try {
+ builder.sslContext(sslContextBuilder.build());
+ } catch (SSLException e) {
+ throw new RuntimeException(e);
+ }
+
+ if (this.overrideHostname != null) {
+ builder.overrideAuthority(this.overrideHostname);
+ }
+ } else {
+ builder.usePlaintext();
+ }
+
+ builder
+ .maxTraceEvents(MAX_CHANNEL_TRACE_EVENTS)
+ .maxInboundMessageSize(maxInboundMessageSize);
+ return new FlightClient(allocator, builder.build(), middleware);
+ }
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClientMiddleware.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClientMiddleware.java
new file mode 100644
index 000000000..1528ca6c6
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClientMiddleware.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+/**
+ * Client-side middleware for Flight.
+ *
+ * <p>Middleware are instantiated per-call and should store state in the middleware instance.
+ */
+public interface FlightClientMiddleware {
+ /**
+ * A callback used before request headers are sent. The headers may be manipulated.
+ */
+ void onBeforeSendingHeaders(CallHeaders outgoingHeaders);
+
+ /**
+ * A callback called after response headers are received. The headers may be manipulated.
+ */
+ void onHeadersReceived(CallHeaders incomingHeaders);
+
+ /**
+ * A callback called after the call completes.
+ */
+ void onCallCompleted(CallStatus status);
+
+ /**
+ * A factory for client middleware instances.
+ */
+ interface Factory {
+ /**
+ * Create a new middleware instance for the given call.
+ *
+ * @throws FlightRuntimeException if the middleware wants to reject the call with the given status
+ */
+ FlightClientMiddleware onCallStarted(CallInfo info);
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightConstants.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightConstants.java
new file mode 100644
index 000000000..2d039c9d2
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightConstants.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+/**
+ * String constants relevant to flight implementations.
+ */
+public interface FlightConstants {
+
+ String SERVICE = "arrow.flight.protocol.FlightService";
+
+ FlightServerMiddleware.Key<ServerHeaderMiddleware> HEADER_KEY =
+ FlightServerMiddleware.Key.of("org.apache.arrow.flight.ServerHeaderMiddleware");
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightDescriptor.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightDescriptor.java
new file mode 100644
index 000000000..3eff011d9
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightDescriptor.java
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.arrow.flight.impl.Flight;
+import org.apache.arrow.flight.impl.Flight.FlightDescriptor.DescriptorType;
+import org.apache.arrow.util.Preconditions;
+
+import com.google.common.base.Joiner;
+import com.google.common.collect.ImmutableList;
+import com.google.protobuf.ByteString;
+
+/**
+ * An identifier for a particular set of data. This can either be an opaque command that generates
+ * the data or a static "path" to the data. This is a POJO wrapper around the protobuf message with
+ * the same name.
+ */
+public class FlightDescriptor {
+
+ private boolean isCmd;
+ private List<String> path;
+ private byte[] cmd;
+
+ private FlightDescriptor(boolean isCmd, List<String> path, byte[] cmd) {
+ super();
+ this.isCmd = isCmd;
+ this.path = path;
+ this.cmd = cmd;
+ }
+
+ public static FlightDescriptor command(byte[] cmd) {
+ return new FlightDescriptor(true, null, cmd);
+ }
+
+ public static FlightDescriptor path(Iterable<String> path) {
+ return new FlightDescriptor(false, ImmutableList.copyOf(path), null);
+ }
+
+ public static FlightDescriptor path(String...path) {
+ return new FlightDescriptor(false, ImmutableList.copyOf(path), null);
+ }
+
+ FlightDescriptor(Flight.FlightDescriptor descriptor) {
+ if (descriptor.getType() == DescriptorType.CMD) {
+ isCmd = true;
+ cmd = descriptor.getCmd().toByteArray();
+ } else if (descriptor.getType() == DescriptorType.PATH) {
+ isCmd = false;
+ path = descriptor.getPathList();
+ } else {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+ public boolean isCommand() {
+ return isCmd;
+ }
+
+ public List<String> getPath() {
+ Preconditions.checkArgument(!isCmd);
+ return path;
+ }
+
+ public byte[] getCommand() {
+ Preconditions.checkArgument(isCmd);
+ return cmd;
+ }
+
+ Flight.FlightDescriptor toProtocol() {
+ Flight.FlightDescriptor.Builder b = Flight.FlightDescriptor.newBuilder();
+
+ if (isCmd) {
+ return b.setType(DescriptorType.CMD).setCmd(ByteString.copyFrom(cmd)).build();
+ }
+ return b.setType(DescriptorType.PATH).addAllPath(path).build();
+ }
+
+ /**
+ * Get the serialized form of this protocol message.
+ *
+ * <p>Intended to help interoperability by allowing non-Flight services to still return Flight types.
+ */
+ public ByteBuffer serialize() {
+ return ByteBuffer.wrap(toProtocol().toByteArray());
+ }
+
+ /**
+ * Parse the serialized form of this protocol message.
+ *
+ * <p>Intended to help interoperability by allowing Flight clients to obtain stream info from non-Flight services.
+ *
+ * @param serialized The serialized form of the FlightDescriptor, as returned by {@link #serialize()}.
+ * @return The deserialized FlightDescriptor.
+ * @throws IOException if the serialized form is invalid.
+ */
+ public static FlightDescriptor deserialize(ByteBuffer serialized) throws IOException {
+ return new FlightDescriptor(Flight.FlightDescriptor.parseFrom(serialized));
+ }
+
+ @Override
+ public String toString() {
+ if (isCmd) {
+ return toHex(cmd);
+ } else {
+ return Joiner.on('.').join(path);
+ }
+ }
+
+ private String toHex(byte[] bytes) {
+ StringBuilder sb = new StringBuilder();
+ for (byte b : bytes) {
+ sb.append(String.format("%02X ", b));
+ }
+ return sb.toString();
+ }
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + ((cmd == null) ? 0 : Arrays.hashCode(cmd));
+ result = prime * result + (isCmd ? 1231 : 1237);
+ result = prime * result + ((path == null) ? 0 : path.hashCode());
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj) {
+ return true;
+ }
+ if (obj == null) {
+ return false;
+ }
+ if (getClass() != obj.getClass()) {
+ return false;
+ }
+ FlightDescriptor other = (FlightDescriptor) obj;
+ if (cmd == null) {
+ if (other.cmd != null) {
+ return false;
+ }
+ } else if (!Arrays.equals(cmd, other.cmd)) {
+ return false;
+ }
+ if (isCmd != other.isCmd) {
+ return false;
+ }
+ if (path == null) {
+ if (other.path != null) {
+ return false;
+ }
+ } else if (!path.equals(other.path)) {
+ return false;
+ }
+ return true;
+ }
+
+
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightEndpoint.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightEndpoint.java
new file mode 100644
index 000000000..2e46b694d
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightEndpoint.java
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Objects;
+
+import org.apache.arrow.flight.impl.Flight;
+
+import com.google.common.collect.ImmutableList;
+
+/**
+ * POJO to convert to/from the underlying protobuf FlightEndpoint.
+ */
+public class FlightEndpoint {
+ private List<Location> locations;
+ private Ticket ticket;
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param ticket A ticket that describe the key of a data stream.
+ * @param locations The possible locations the stream can be retrieved from.
+ */
+ public FlightEndpoint(Ticket ticket, Location... locations) {
+ super();
+ Objects.requireNonNull(ticket);
+ this.locations = ImmutableList.copyOf(locations);
+ this.ticket = ticket;
+ }
+
+ /**
+ * Constructs from the protocol buffer representation.
+ */
+ FlightEndpoint(Flight.FlightEndpoint flt) throws URISyntaxException {
+ locations = new ArrayList<>();
+ for (final Flight.Location location : flt.getLocationList()) {
+ locations.add(new Location(location.getUri()));
+ }
+ ticket = new Ticket(flt.getTicket());
+ }
+
+ public List<Location> getLocations() {
+ return locations;
+ }
+
+ public Ticket getTicket() {
+ return ticket;
+ }
+
+ /**
+ * Converts to the protocol buffer representation.
+ */
+ Flight.FlightEndpoint toProtocol() {
+ Flight.FlightEndpoint.Builder b = Flight.FlightEndpoint.newBuilder()
+ .setTicket(ticket.toProtocol());
+
+ for (Location l : locations) {
+ b.addLocation(l.toProtocol());
+ }
+ return b.build();
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ FlightEndpoint that = (FlightEndpoint) o;
+ return locations.equals(that.locations) &&
+ ticket.equals(that.ticket);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(locations, ticket);
+ }
+
+ @Override
+ public String toString() {
+ return "FlightEndpoint{" +
+ "locations=" + locations +
+ ", ticket=" + ticket +
+ '}';
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightInfo.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightInfo.java
new file mode 100644
index 000000000..e57b311c2
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightInfo.java
@@ -0,0 +1,208 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.net.URISyntaxException;
+import java.nio.ByteBuffer;
+import java.nio.channels.Channels;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Objects;
+import java.util.stream.Collectors;
+
+import org.apache.arrow.flight.impl.Flight;
+import org.apache.arrow.vector.ipc.ReadChannel;
+import org.apache.arrow.vector.ipc.WriteChannel;
+import org.apache.arrow.vector.ipc.message.IpcOption;
+import org.apache.arrow.vector.ipc.message.MessageSerializer;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.validate.MetadataV4UnionChecker;
+
+import com.fasterxml.jackson.databind.util.ByteBufferBackedInputStream;
+import com.google.common.collect.ImmutableList;
+import com.google.protobuf.ByteString;
+
+/**
+ * A POJO representation of a FlightInfo, metadata associated with a set of data records.
+ */
+public class FlightInfo {
+ private final Schema schema;
+ private final FlightDescriptor descriptor;
+ private final List<FlightEndpoint> endpoints;
+ private final long bytes;
+ private final long records;
+ private final IpcOption option;
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param schema The schema of the Flight
+ * @param descriptor An identifier for the Flight.
+ * @param endpoints A list of endpoints that have the flight available.
+ * @param bytes The number of bytes in the flight
+ * @param records The number of records in the flight.
+ */
+ public FlightInfo(Schema schema, FlightDescriptor descriptor, List<FlightEndpoint> endpoints, long bytes,
+ long records) {
+ this(schema, descriptor, endpoints, bytes, records, IpcOption.DEFAULT);
+ }
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param schema The schema of the Flight
+ * @param descriptor An identifier for the Flight.
+ * @param endpoints A list of endpoints that have the flight available.
+ * @param bytes The number of bytes in the flight
+ * @param records The number of records in the flight.
+ * @param option IPC write options.
+ */
+ public FlightInfo(Schema schema, FlightDescriptor descriptor, List<FlightEndpoint> endpoints, long bytes,
+ long records, IpcOption option) {
+ Objects.requireNonNull(schema);
+ Objects.requireNonNull(descriptor);
+ Objects.requireNonNull(endpoints);
+ MetadataV4UnionChecker.checkForUnion(schema.getFields().iterator(), option.metadataVersion);
+ this.schema = schema;
+ this.descriptor = descriptor;
+ this.endpoints = endpoints;
+ this.bytes = bytes;
+ this.records = records;
+ this.option = option;
+ }
+
+ /**
+ * Constructs from the protocol buffer representation.
+ */
+ FlightInfo(Flight.FlightInfo pbFlightInfo) throws URISyntaxException {
+ try {
+ final ByteBuffer schemaBuf = pbFlightInfo.getSchema().asReadOnlyByteBuffer();
+ schema = pbFlightInfo.getSchema().size() > 0 ?
+ MessageSerializer.deserializeSchema(
+ new ReadChannel(Channels.newChannel(new ByteBufferBackedInputStream(schemaBuf))))
+ : new Schema(ImmutableList.of());
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ descriptor = new FlightDescriptor(pbFlightInfo.getFlightDescriptor());
+ endpoints = new ArrayList<>();
+ for (final Flight.FlightEndpoint endpoint : pbFlightInfo.getEndpointList()) {
+ endpoints.add(new FlightEndpoint(endpoint));
+ }
+ bytes = pbFlightInfo.getTotalBytes();
+ records = pbFlightInfo.getTotalRecords();
+ option = IpcOption.DEFAULT;
+ }
+
+ public Schema getSchema() {
+ return schema;
+ }
+
+ public long getBytes() {
+ return bytes;
+ }
+
+ public long getRecords() {
+ return records;
+ }
+
+ public FlightDescriptor getDescriptor() {
+ return descriptor;
+ }
+
+ public List<FlightEndpoint> getEndpoints() {
+ return endpoints;
+ }
+
+ /**
+ * Converts to the protocol buffer representation.
+ */
+ Flight.FlightInfo toProtocol() {
+ // Encode schema in a Message payload
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ try {
+ MessageSerializer.serialize(new WriteChannel(Channels.newChannel(baos)), schema, option);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ return Flight.FlightInfo.newBuilder()
+ .addAllEndpoint(endpoints.stream().map(t -> t.toProtocol()).collect(Collectors.toList()))
+ .setSchema(ByteString.copyFrom(baos.toByteArray()))
+ .setFlightDescriptor(descriptor.toProtocol())
+ .setTotalBytes(FlightInfo.this.bytes)
+ .setTotalRecords(records)
+ .build();
+ }
+
+ /**
+ * Get the serialized form of this protocol message.
+ *
+ * <p>Intended to help interoperability by allowing non-Flight services to still return Flight types.
+ */
+ public ByteBuffer serialize() {
+ return ByteBuffer.wrap(toProtocol().toByteArray());
+ }
+
+ /**
+ * Parse the serialized form of this protocol message.
+ *
+ * <p>Intended to help interoperability by allowing Flight clients to obtain stream info from non-Flight services.
+ *
+ * @param serialized The serialized form of the FlightInfo, as returned by {@link #serialize()}.
+ * @return The deserialized FlightInfo.
+ * @throws IOException if the serialized form is invalid.
+ * @throws URISyntaxException if the serialized form contains an unsupported URI format.
+ */
+ public static FlightInfo deserialize(ByteBuffer serialized) throws IOException, URISyntaxException {
+ return new FlightInfo(Flight.FlightInfo.parseFrom(serialized));
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ FlightInfo that = (FlightInfo) o;
+ return bytes == that.bytes &&
+ records == that.records &&
+ schema.equals(that.schema) &&
+ descriptor.equals(that.descriptor) &&
+ endpoints.equals(that.endpoints);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(schema, descriptor, endpoints, bytes, records);
+ }
+
+ @Override
+ public String toString() {
+ return "FlightInfo{" +
+ "schema=" + schema +
+ ", descriptor=" + descriptor +
+ ", endpoints=" + endpoints +
+ ", bytes=" + bytes +
+ ", records=" + records +
+ '}';
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightMethod.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightMethod.java
new file mode 100644
index 000000000..5d2915bb6
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightMethod.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import org.apache.arrow.flight.impl.FlightServiceGrpc;
+
+/**
+ * All the RPC methods available in Flight.
+ */
+public enum FlightMethod {
+ HANDSHAKE,
+ LIST_FLIGHTS,
+ GET_FLIGHT_INFO,
+ GET_SCHEMA,
+ DO_GET,
+ DO_PUT,
+ DO_ACTION,
+ LIST_ACTIONS,
+ DO_EXCHANGE,
+ ;
+
+ /**
+ * Convert a method name string into a {@link FlightMethod}.
+ *
+ * @throws IllegalArgumentException if the method name is not valid.
+ */
+ public static FlightMethod fromProtocol(final String methodName) {
+ if (FlightServiceGrpc.getHandshakeMethod().getFullMethodName().equals(methodName)) {
+ return HANDSHAKE;
+ } else if (FlightServiceGrpc.getListFlightsMethod().getFullMethodName().equals(methodName)) {
+ return LIST_FLIGHTS;
+ } else if (FlightServiceGrpc.getGetFlightInfoMethod().getFullMethodName().equals(methodName)) {
+ return GET_FLIGHT_INFO;
+ } else if (FlightServiceGrpc.getGetSchemaMethod().getFullMethodName().equals(methodName)) {
+ return GET_SCHEMA;
+ } else if (FlightServiceGrpc.getDoGetMethod().getFullMethodName().equals(methodName)) {
+ return DO_GET;
+ } else if (FlightServiceGrpc.getDoPutMethod().getFullMethodName().equals(methodName)) {
+ return DO_PUT;
+ } else if (FlightServiceGrpc.getDoActionMethod().getFullMethodName().equals(methodName)) {
+ return DO_ACTION;
+ } else if (FlightServiceGrpc.getListActionsMethod().getFullMethodName().equals(methodName)) {
+ return LIST_ACTIONS;
+ } else if (FlightServiceGrpc.getDoExchangeMethod().getFullMethodName().equals(methodName)) {
+ return DO_EXCHANGE;
+ }
+ throw new IllegalArgumentException("Not a Flight method name in gRPC: " + methodName);
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightProducer.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightProducer.java
new file mode 100644
index 000000000..5e5b26505
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightProducer.java
@@ -0,0 +1,164 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.util.Map;
+
+/**
+ * API to Implement an Arrow Flight producer.
+ */
+public interface FlightProducer {
+
+ /**
+ * Return data for a stream.
+ *
+ * @param context Per-call context.
+ * @param ticket The application-defined ticket identifying this stream.
+ * @param listener An interface for sending data back to the client.
+ */
+ void getStream(CallContext context, Ticket ticket, ServerStreamListener listener);
+
+ /**
+ * List available data streams on this service.
+ *
+ * @param context Per-call context.
+ * @param criteria Application-defined criteria for filtering streams.
+ * @param listener An interface for sending data back to the client.
+ */
+ void listFlights(CallContext context, Criteria criteria,
+ StreamListener<FlightInfo> listener);
+
+ /**
+ * Get information about a particular data stream.
+ *
+ * @param context Per-call context.
+ * @param descriptor The descriptor identifying the data stream.
+ * @return Metadata about the stream.
+ */
+ FlightInfo getFlightInfo(CallContext context, FlightDescriptor descriptor);
+
+ /**
+ * Get schema for a particular data stream.
+ *
+ * @param context Per-call context.
+ * @param descriptor The descriptor identifying the data stream.
+ * @return Schema for the stream.
+ */
+ default SchemaResult getSchema(CallContext context, FlightDescriptor descriptor) {
+ FlightInfo info = getFlightInfo(context, descriptor);
+ return new SchemaResult(info.getSchema());
+ }
+
+
+ /**
+ * Accept uploaded data for a particular stream.
+ *
+ * @param context Per-call context.
+ * @param flightStream The data stream being uploaded.
+ */
+ Runnable acceptPut(CallContext context,
+ FlightStream flightStream, StreamListener<PutResult> ackStream);
+
+ default void doExchange(CallContext context, FlightStream reader, ServerStreamListener writer) {
+ throw CallStatus.UNIMPLEMENTED.withDescription("DoExchange is unimplemented").toRuntimeException();
+ }
+
+ /**
+ * Generic handler for application-defined RPCs.
+ *
+ * @param context Per-call context.
+ * @param action Client-supplied parameters.
+ * @param listener A stream of responses.
+ */
+ void doAction(CallContext context, Action action,
+ StreamListener<Result> listener);
+
+ /**
+ * List available application-defined RPCs.
+ * @param context Per-call context.
+ * @param listener An interface for sending data back to the client.
+ */
+ void listActions(CallContext context, StreamListener<ActionType> listener);
+
+ /**
+ * An interface for sending Arrow data back to a client.
+ */
+ interface ServerStreamListener extends OutboundStreamListener {
+
+ /**
+ * Check whether the call has been cancelled. If so, stop sending data.
+ */
+ boolean isCancelled();
+
+ /**
+ * Set a callback for when the client cancels a call, i.e. {@link #isCancelled()} has become true.
+ *
+ * <p>Note that this callback may only be called some time after {@link #isCancelled()} becomes true, and may never
+ * be called if all executor threads on the server are busy, or the RPC method body is implemented in a blocking
+ * fashion.
+ */
+ void setOnCancelHandler(Runnable handler);
+ }
+
+ /**
+ * Callbacks for pushing objects to a receiver.
+ *
+ * @param <T> Type of the values in the stream.
+ */
+ interface StreamListener<T> {
+
+ /**
+ * Send the next value to the client.
+ */
+ void onNext(T val);
+
+ /**
+ * Indicate an error to the client.
+ *
+ * <p>Terminates the stream; do not call {@link #onCompleted()}.
+ */
+ void onError(Throwable t);
+
+ /**
+ * Indicate that the transmission is finished.
+ */
+ void onCompleted();
+
+ }
+
+ /**
+ * Call-specific context.
+ */
+ interface CallContext {
+ /** The identity of the authenticated peer. May be the empty string if unknown. */
+ String peerIdentity();
+
+ /** Whether the call has been cancelled by the client. */
+ boolean isCancelled();
+
+ /**
+ * Get the middleware instance of the given type for this call.
+ *
+ * <p>Returns null if not found.
+ */
+ <T extends FlightServerMiddleware> T getMiddleware(FlightServerMiddleware.Key<T> key);
+
+ /** Get an immutable map of middleware for this call. */
+ Map<FlightServerMiddleware.Key<?>, FlightServerMiddleware> getMiddleware();
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightRuntimeException.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightRuntimeException.java
new file mode 100644
index 000000000..76d3349a2
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightRuntimeException.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+/**
+ * An exception raised from a Flight RPC.
+ *
+ * <p>In service implementations, raising an instance of this exception will provide clients with a more detailed
+ * message and error code.
+ */
+public class FlightRuntimeException extends RuntimeException {
+ private final CallStatus status;
+
+ /**
+ * Create a new exception from the given status.
+ */
+ FlightRuntimeException(CallStatus status) {
+ super(status.description(), status.cause());
+ this.status = status;
+ }
+
+ public CallStatus status() {
+ return status;
+ }
+
+ @Override
+ public String toString() {
+ String s = getClass().getName();
+ return String.format("%s: %s: %s", s, status.code(), status.description());
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightServer.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightServer.java
new file mode 100644
index 000000000..d59480bfb
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightServer.java
@@ -0,0 +1,399 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+import java.util.function.Consumer;
+
+import org.apache.arrow.flight.auth.ServerAuthHandler;
+import org.apache.arrow.flight.auth.ServerAuthInterceptor;
+import org.apache.arrow.flight.auth2.Auth2Constants;
+import org.apache.arrow.flight.auth2.CallHeaderAuthenticator;
+import org.apache.arrow.flight.auth2.ServerCallHeaderAuthMiddleware;
+import org.apache.arrow.flight.grpc.ServerInterceptorAdapter;
+import org.apache.arrow.flight.grpc.ServerInterceptorAdapter.KeyFactory;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.util.VisibleForTesting;
+
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
+
+import io.grpc.Server;
+import io.grpc.ServerInterceptors;
+import io.grpc.netty.NettyServerBuilder;
+import io.netty.channel.EventLoopGroup;
+import io.netty.channel.ServerChannel;
+
+/**
+ * Generic server of flight data that is customized via construction with delegate classes for the
+ * actual logic. The server currently uses GRPC as its transport mechanism.
+ */
+public class FlightServer implements AutoCloseable {
+
+ private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(FlightServer.class);
+
+ private final Location location;
+ private final Server server;
+ // The executor used by the gRPC server. We don't use it here, but we do need to clean it up with the server.
+ // May be null, if a user-supplied executor was provided (as we do not want to clean that up)
+ @VisibleForTesting
+ final ExecutorService grpcExecutor;
+
+ /** The maximum size of an individual gRPC message. This effectively disables the limit. */
+ static final int MAX_GRPC_MESSAGE_SIZE = Integer.MAX_VALUE;
+
+ /** Create a new instance from a gRPC server. For internal use only. */
+ private FlightServer(Location location, Server server, ExecutorService grpcExecutor) {
+ this.location = location;
+ this.server = server;
+ this.grpcExecutor = grpcExecutor;
+ }
+
+ /** Start the server. */
+ public FlightServer start() throws IOException {
+ server.start();
+ return this;
+ }
+
+ /** Get the port the server is running on (if applicable). */
+ public int getPort() {
+ return server.getPort();
+ }
+
+ /** Get the location for this server. */
+ public Location getLocation() {
+ if (location.getUri().getPort() == 0) {
+ // If the server was bound to port 0, replace the port in the location with the real port.
+ final URI uri = location.getUri();
+ try {
+ return new Location(new URI(uri.getScheme(), uri.getUserInfo(), uri.getHost(), getPort(),
+ uri.getPath(), uri.getQuery(), uri.getFragment()));
+ } catch (URISyntaxException e) {
+ // We don't expect this to happen
+ throw new RuntimeException(e);
+ }
+ }
+ return location;
+ }
+
+ /** Block until the server shuts down. */
+ public void awaitTermination() throws InterruptedException {
+ server.awaitTermination();
+ }
+
+ /** Request that the server shut down. */
+ public void shutdown() {
+ server.shutdown();
+ if (grpcExecutor != null) {
+ grpcExecutor.shutdown();
+ }
+ }
+
+ /**
+ * Wait for the server to shut down with a timeout.
+ * @return true if the server shut down successfully.
+ */
+ public boolean awaitTermination(final long timeout, final TimeUnit unit) throws InterruptedException {
+ return server.awaitTermination(timeout, unit);
+ }
+
+ /** Shutdown the server, waits for up to 6 seconds for successful shutdown before returning. */
+ public void close() throws InterruptedException {
+ shutdown();
+ final boolean terminated = awaitTermination(3000, TimeUnit.MILLISECONDS);
+ if (terminated) {
+ logger.debug("Server was terminated within 3s");
+ return;
+ }
+
+ // get more aggressive in termination.
+ server.shutdownNow();
+
+ int count = 0;
+ while (!server.isTerminated() & count < 30) {
+ count++;
+ logger.debug("Waiting for termination");
+ Thread.sleep(100);
+ }
+
+ if (!server.isTerminated()) {
+ logger.warn("Couldn't shutdown server, resources likely will be leaked.");
+ }
+ }
+
+ /** Create a builder for a Flight server. */
+ public static Builder builder() {
+ return new Builder();
+ }
+
+ /** Create a builder for a Flight server. */
+ public static Builder builder(BufferAllocator allocator, Location location, FlightProducer producer) {
+ return new Builder(allocator, location, producer);
+ }
+
+ /** A builder for Flight servers. */
+ public static final class Builder {
+ private BufferAllocator allocator;
+ private Location location;
+ private FlightProducer producer;
+ private final Map<String, Object> builderOptions;
+ private ServerAuthHandler authHandler = ServerAuthHandler.NO_OP;
+ private CallHeaderAuthenticator headerAuthenticator = CallHeaderAuthenticator.NO_OP;
+ private ExecutorService executor = null;
+ private int maxInboundMessageSize = MAX_GRPC_MESSAGE_SIZE;
+ private InputStream certChain;
+ private InputStream key;
+ private final List<KeyFactory<?>> interceptors;
+ // Keep track of inserted interceptors
+ private final Set<String> interceptorKeys;
+
+ Builder() {
+ builderOptions = new HashMap<>();
+ interceptors = new ArrayList<>();
+ interceptorKeys = new HashSet<>();
+ }
+
+ Builder(BufferAllocator allocator, Location location, FlightProducer producer) {
+ this();
+ this.allocator = Preconditions.checkNotNull(allocator);
+ this.location = Preconditions.checkNotNull(location);
+ this.producer = Preconditions.checkNotNull(producer);
+ }
+
+ /** Create the server for this builder. */
+ public FlightServer build() {
+ // Add the auth middleware if applicable.
+ if (headerAuthenticator != CallHeaderAuthenticator.NO_OP) {
+ this.middleware(FlightServerMiddleware.Key.of(Auth2Constants.AUTHORIZATION_HEADER),
+ new ServerCallHeaderAuthMiddleware.Factory(headerAuthenticator));
+ }
+
+ this.middleware(FlightConstants.HEADER_KEY, new ServerHeaderMiddleware.Factory());
+
+ final NettyServerBuilder builder;
+ switch (location.getUri().getScheme()) {
+ case LocationSchemes.GRPC_DOMAIN_SOCKET: {
+ // The implementation is platform-specific, so we have to find the classes at runtime
+ builder = NettyServerBuilder.forAddress(location.toSocketAddress());
+ try {
+ try {
+ // Linux
+ builder.channelType(
+ (Class<? extends ServerChannel>) Class
+ .forName("io.netty.channel.epoll.EpollServerDomainSocketChannel"));
+ final EventLoopGroup elg = (EventLoopGroup) Class.forName("io.netty.channel.epoll.EpollEventLoopGroup")
+ .newInstance();
+ builder.bossEventLoopGroup(elg).workerEventLoopGroup(elg);
+ } catch (ClassNotFoundException e) {
+ // BSD
+ builder.channelType(
+ (Class<? extends ServerChannel>) Class
+ .forName("io.netty.channel.kqueue.KQueueServerDomainSocketChannel"));
+ final EventLoopGroup elg = (EventLoopGroup) Class.forName("io.netty.channel.kqueue.KQueueEventLoopGroup")
+ .newInstance();
+ builder.bossEventLoopGroup(elg).workerEventLoopGroup(elg);
+ }
+ } catch (ClassNotFoundException | InstantiationException | IllegalAccessException e) {
+ throw new UnsupportedOperationException(
+ "Could not find suitable Netty native transport implementation for domain socket address.");
+ }
+ break;
+ }
+ case LocationSchemes.GRPC:
+ case LocationSchemes.GRPC_INSECURE: {
+ builder = NettyServerBuilder.forAddress(location.toSocketAddress());
+ break;
+ }
+ case LocationSchemes.GRPC_TLS: {
+ if (certChain == null) {
+ throw new IllegalArgumentException("Must provide a certificate and key to serve gRPC over TLS");
+ }
+ builder = NettyServerBuilder.forAddress(location.toSocketAddress());
+ break;
+ }
+ default:
+ throw new IllegalArgumentException("Scheme is not supported: " + location.getUri().getScheme());
+ }
+
+ if (certChain != null) {
+ builder.useTransportSecurity(certChain, key);
+ }
+
+ // Share one executor between the gRPC service, DoPut, and Handshake
+ final ExecutorService exec;
+ // We only want to have FlightServer close the gRPC executor if we created it here. We should not close
+ // user-supplied executors.
+ final ExecutorService grpcExecutor;
+ if (executor != null) {
+ exec = executor;
+ grpcExecutor = null;
+ } else {
+ exec = Executors.newCachedThreadPool(
+ // Name threads for better debuggability
+ new ThreadFactoryBuilder().setNameFormat("flight-server-default-executor-%d").build());
+ grpcExecutor = exec;
+ }
+ final FlightBindingService flightService = new FlightBindingService(allocator, producer, authHandler, exec);
+ builder
+ .executor(exec)
+ .maxInboundMessageSize(maxInboundMessageSize)
+ .addService(
+ ServerInterceptors.intercept(
+ flightService,
+ new ServerAuthInterceptor(authHandler)));
+
+ // Allow hooking into the gRPC builder. This is not guaranteed to be available on all Arrow versions or
+ // Flight implementations.
+ builderOptions.computeIfPresent("grpc.builderConsumer", (key, builderConsumer) -> {
+ final Consumer<NettyServerBuilder> consumer = (Consumer<NettyServerBuilder>) builderConsumer;
+ consumer.accept(builder);
+ return null;
+ });
+
+ // Allow explicitly setting some Netty-specific options
+ builderOptions.computeIfPresent("netty.channelType", (key, channelType) -> {
+ builder.channelType((Class<? extends ServerChannel>) channelType);
+ return null;
+ });
+ builderOptions.computeIfPresent("netty.bossEventLoopGroup", (key, elg) -> {
+ builder.bossEventLoopGroup((EventLoopGroup) elg);
+ return null;
+ });
+ builderOptions.computeIfPresent("netty.workerEventLoopGroup", (key, elg) -> {
+ builder.workerEventLoopGroup((EventLoopGroup) elg);
+ return null;
+ });
+
+ builder.intercept(new ServerInterceptorAdapter(interceptors));
+ return new FlightServer(location, builder.build(), grpcExecutor);
+ }
+
+ /**
+ * Set the maximum size of a message. Defaults to "unlimited", depending on the underlying transport.
+ */
+ public Builder maxInboundMessageSize(int maxMessageSize) {
+ this.maxInboundMessageSize = maxMessageSize;
+ return this;
+ }
+
+ /**
+ * Enable TLS on the server.
+ * @param certChain The certificate chain to use.
+ * @param key The private key to use.
+ */
+ public Builder useTls(final File certChain, final File key) throws IOException {
+ this.certChain = new FileInputStream(certChain);
+ this.key = new FileInputStream(key);
+ return this;
+ }
+
+ /**
+ * Enable TLS on the server.
+ * @param certChain The certificate chain to use.
+ * @param key The private key to use.
+ */
+ public Builder useTls(final InputStream certChain, final InputStream key) {
+ this.certChain = certChain;
+ this.key = key;
+ return this;
+ }
+
+ /**
+ * Set the executor used by the server.
+ *
+ * <p>Flight will NOT take ownership of the executor. The application must clean it up if one is provided. (If not
+ * provided, Flight will use a default executor which it will clean up.)
+ */
+ public Builder executor(ExecutorService executor) {
+ this.executor = executor;
+ return this;
+ }
+
+ /**
+ * Set the authentication handler.
+ */
+ public Builder authHandler(ServerAuthHandler authHandler) {
+ this.authHandler = authHandler;
+ return this;
+ }
+
+ /**
+ * Set the header-based authentication mechanism.
+ */
+ public Builder headerAuthenticator(CallHeaderAuthenticator headerAuthenticator) {
+ this.headerAuthenticator = headerAuthenticator;
+ return this;
+ }
+
+ /**
+ * Provide a transport-specific option. Not guaranteed to have any effect.
+ */
+ public Builder transportHint(final String key, Object option) {
+ builderOptions.put(key, option);
+ return this;
+ }
+
+ /**
+ * Add a Flight middleware component to inspect and modify requests to this service.
+ *
+ * @param key An identifier for this middleware component. Service implementations can retrieve the middleware
+ * instance for the current call using {@link org.apache.arrow.flight.FlightProducer.CallContext}.
+ * @param factory A factory for the middleware.
+ * @param <T> The middleware type.
+ * @throws IllegalArgumentException if the key already exists
+ */
+ public <T extends FlightServerMiddleware> Builder middleware(final FlightServerMiddleware.Key<T> key,
+ final FlightServerMiddleware.Factory<T> factory) {
+ if (interceptorKeys.contains(key.key)) {
+ throw new IllegalArgumentException("Key already exists: " + key.key);
+ }
+ interceptors.add(new KeyFactory<>(key, factory));
+ interceptorKeys.add(key.key);
+ return this;
+ }
+
+ public Builder allocator(BufferAllocator allocator) {
+ this.allocator = Preconditions.checkNotNull(allocator);
+ return this;
+ }
+
+ public Builder location(Location location) {
+ this.location = Preconditions.checkNotNull(location);
+ return this;
+ }
+
+ public Builder producer(FlightProducer producer) {
+ this.producer = Preconditions.checkNotNull(producer);
+ return this;
+ }
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightServerMiddleware.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightServerMiddleware.java
new file mode 100644
index 000000000..9bc8bbfe7
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightServerMiddleware.java
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.util.Objects;
+
+/**
+ * Server-side middleware for Flight calls.
+ *
+ * <p>Middleware are instantiated per-call.
+ *
+ * <p>Methods are not guaranteed to be called on any particular thread, relative to the thread that Flight requests are
+ * executed on. Do not depend on thread-local storage; instead, use state on the middleware instance. Service
+ * implementations may communicate with middleware implementations through
+ * {@link org.apache.arrow.flight.FlightProducer.CallContext#getMiddleware(Key)}. Methods on the middleware instance
+ * are non-reentrant, that is, a particular RPC will not make multiple concurrent calls to methods on a single
+ * middleware instance. However, methods on the factory instance are expected to be thread-safe, and if the factory
+ * instance returns the same middleware object more than once, then that middleware object must be thread-safe.
+ */
+public interface FlightServerMiddleware {
+
+ /**
+ * A factory for Flight server middleware.
+ * @param <T> The middleware type.
+ */
+ interface Factory<T extends FlightServerMiddleware> {
+ /**
+ * A callback for when the call starts.
+ *
+ * @param info Details about the call.
+ * @param incomingHeaders A mutable set of request headers.
+ * @param context Context about the current request.
+ *
+ * @throws FlightRuntimeException if the middleware wants to reject the call with the given status
+ */
+ T onCallStarted(CallInfo info, CallHeaders incomingHeaders, RequestContext context);
+ }
+
+ /**
+ * A key for Flight server middleware. On a server, middleware instances are identified by this key.
+ *
+ * <p>Keys use reference equality, so instances should be shared.
+ *
+ * @param <T> The middleware class stored in this key. This provides a compile-time check when retrieving instances.
+ */
+ class Key<T extends FlightServerMiddleware> {
+ final String key;
+
+ Key(String key) {
+ this.key = Objects.requireNonNull(key, "Key must not be null.");
+ }
+
+ /**
+ * Create a new key for the given type.
+ */
+ public static <T extends FlightServerMiddleware> Key<T> of(String key) {
+ return new Key<>(key);
+ }
+ }
+
+ /**
+ * Callback for when the underlying transport is about to send response headers.
+ *
+ * @param outgoingHeaders A mutable set of response headers. These can be manipulated to send different headers to the
+ * client.
+ */
+ void onBeforeSendingHeaders(CallHeaders outgoingHeaders);
+
+ /**
+ * Callback for when the underlying transport has completed a call.
+ * @param status Whether the call completed successfully or not.
+ */
+ void onCallCompleted(CallStatus status);
+
+ /**
+ * Callback for when an RPC method implementation throws an uncaught exception.
+ *
+ * <p>May be called multiple times, and may be called before or after {@link #onCallCompleted(CallStatus)}.
+ * Generally, an uncaught exception will end the call with a error {@link CallStatus}, and will be reported to {@link
+ * #onCallCompleted(CallStatus)}, but not necessarily this method.
+ *
+ * @param err The exception that was thrown.
+ */
+ void onCallErrored(Throwable err);
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightService.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightService.java
new file mode 100644
index 000000000..4fb0dea2c
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightService.java
@@ -0,0 +1,427 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.util.Collections;
+import java.util.Map;
+import java.util.concurrent.ExecutorService;
+import java.util.function.BooleanSupplier;
+import java.util.function.Consumer;
+
+import org.apache.arrow.flight.FlightProducer.ServerStreamListener;
+import org.apache.arrow.flight.FlightServerMiddleware.Key;
+import org.apache.arrow.flight.auth.AuthConstants;
+import org.apache.arrow.flight.auth.ServerAuthHandler;
+import org.apache.arrow.flight.auth.ServerAuthWrapper;
+import org.apache.arrow.flight.auth2.Auth2Constants;
+import org.apache.arrow.flight.grpc.ContextPropagatingExecutorService;
+import org.apache.arrow.flight.grpc.RequestContextAdapter;
+import org.apache.arrow.flight.grpc.ServerInterceptorAdapter;
+import org.apache.arrow.flight.grpc.StatusUtils;
+import org.apache.arrow.flight.impl.Flight;
+import org.apache.arrow.flight.impl.FlightServiceGrpc.FlightServiceImplBase;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.util.AutoCloseables;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Strings;
+
+import io.grpc.stub.ServerCallStreamObserver;
+import io.grpc.stub.StreamObserver;
+
+/**
+ * GRPC service implementation for a flight server.
+ */
+class FlightService extends FlightServiceImplBase {
+
+ private static final Logger logger = LoggerFactory.getLogger(FlightService.class);
+ private static final int PENDING_REQUESTS = 5;
+
+ private final BufferAllocator allocator;
+ private final FlightProducer producer;
+ private final ServerAuthHandler authHandler;
+ private final ExecutorService executors;
+
+ FlightService(BufferAllocator allocator, FlightProducer producer, ServerAuthHandler authHandler,
+ ExecutorService executors) {
+ this.allocator = allocator;
+ this.producer = producer;
+ this.authHandler = authHandler;
+ this.executors = new ContextPropagatingExecutorService(executors);
+ }
+
+ private CallContext makeContext(ServerCallStreamObserver<?> responseObserver) {
+ // Try to get the peer identity from middleware first (using the auth2 interfaces).
+ final RequestContext context = RequestContextAdapter.REQUEST_CONTEXT_KEY.get();
+ String peerIdentity = null;
+ if (context != null) {
+ peerIdentity = context.get(Auth2Constants.PEER_IDENTITY_KEY);
+ }
+
+ if (Strings.isNullOrEmpty(peerIdentity)) {
+ // Try the legacy auth interface, which defaults to empty string.
+ peerIdentity = AuthConstants.PEER_IDENTITY_KEY.get();
+ }
+
+ return new CallContext(peerIdentity, responseObserver::isCancelled);
+ }
+
+ @Override
+ public StreamObserver<Flight.HandshakeRequest> handshake(StreamObserver<Flight.HandshakeResponse> responseObserver) {
+ // This method is not meaningful with the auth2 interfaces. Authentication would already
+ // have happened by header/middleware with the auth2 classes.
+ return ServerAuthWrapper.wrapHandshake(authHandler, responseObserver, executors);
+ }
+
+ @Override
+ public void listFlights(Flight.Criteria criteria, StreamObserver<Flight.FlightInfo> responseObserver) {
+ final StreamPipe<FlightInfo, Flight.FlightInfo> listener = StreamPipe
+ .wrap(responseObserver, FlightInfo::toProtocol, this::handleExceptionWithMiddleware);
+ try {
+ final CallContext context = makeContext((ServerCallStreamObserver<?>) responseObserver);
+ producer.listFlights(context, new Criteria(criteria), listener);
+ } catch (Exception ex) {
+ listener.onError(ex);
+ }
+ // Do NOT call StreamPipe#onCompleted, as the FlightProducer implementation may be asynchronous
+ }
+
+ public void doGetCustom(Flight.Ticket ticket, StreamObserver<ArrowMessage> responseObserverSimple) {
+ final ServerCallStreamObserver<ArrowMessage> responseObserver =
+ (ServerCallStreamObserver<ArrowMessage>) responseObserverSimple;
+
+ final GetListener listener = new GetListener(responseObserver, this::handleExceptionWithMiddleware);
+ try {
+ producer.getStream(makeContext(responseObserver), new Ticket(ticket), listener);
+ } catch (Exception ex) {
+ listener.error(ex);
+ }
+ // Do NOT call GetListener#completed, as the implementation of getStream may be asynchronous
+ }
+
+ @Override
+ public void doAction(Flight.Action request, StreamObserver<Flight.Result> responseObserver) {
+ final StreamPipe<Result, Flight.Result> listener = StreamPipe
+ .wrap(responseObserver, Result::toProtocol, this::handleExceptionWithMiddleware);
+ try {
+ final CallContext context = makeContext((ServerCallStreamObserver<?>) responseObserver);
+ producer.doAction(context, new Action(request), listener);
+ } catch (Exception ex) {
+ listener.onError(ex);
+ }
+ // Do NOT call StreamPipe#onCompleted, as the FlightProducer implementation may be asynchronous
+ }
+
+ @Override
+ public void listActions(Flight.Empty request, StreamObserver<Flight.ActionType> responseObserver) {
+ final StreamPipe<org.apache.arrow.flight.ActionType, Flight.ActionType> listener = StreamPipe
+ .wrap(responseObserver, ActionType::toProtocol, this::handleExceptionWithMiddleware);
+ try {
+ final CallContext context = makeContext((ServerCallStreamObserver<?>) responseObserver);
+ producer.listActions(context, listener);
+ } catch (Exception ex) {
+ listener.onError(ex);
+ }
+ // Do NOT call StreamPipe#onCompleted, as the FlightProducer implementation may be asynchronous
+ }
+
+ private static class GetListener extends OutboundStreamListenerImpl implements ServerStreamListener {
+ private ServerCallStreamObserver<ArrowMessage> responseObserver;
+ private final Consumer<Throwable> errorHandler;
+ private Runnable onCancelHandler = null;
+ private Runnable onReadyHandler = null;
+ private boolean completed;
+
+ public GetListener(ServerCallStreamObserver<ArrowMessage> responseObserver, Consumer<Throwable> errorHandler) {
+ super(null, responseObserver);
+ this.errorHandler = errorHandler;
+ this.completed = false;
+ this.responseObserver = responseObserver;
+ this.responseObserver.setOnCancelHandler(this::onCancel);
+ this.responseObserver.setOnReadyHandler(this::onReady);
+ this.responseObserver.disableAutoInboundFlowControl();
+ }
+
+ private void onCancel() {
+ logger.debug("Stream cancelled by client.");
+ if (onCancelHandler != null) {
+ onCancelHandler.run();
+ }
+ }
+
+ private void onReady() {
+ if (onReadyHandler != null) {
+ onReadyHandler.run();
+ }
+ }
+
+ @Override
+ public void setOnCancelHandler(Runnable handler) {
+ this.onCancelHandler = handler;
+ }
+
+ @Override
+ public void setOnReadyHandler(Runnable handler) {
+ this.onReadyHandler = handler;
+ }
+
+ @Override
+ public boolean isCancelled() {
+ return responseObserver.isCancelled();
+ }
+
+ @Override
+ protected void waitUntilStreamReady() {
+ // Don't do anything - service implementations are expected to manage backpressure themselves
+ }
+
+ @Override
+ public void error(Throwable ex) {
+ if (!completed) {
+ completed = true;
+ super.error(ex);
+ } else {
+ errorHandler.accept(ex);
+ }
+ }
+
+ @Override
+ public void completed() {
+ if (!completed) {
+ completed = true;
+ super.completed();
+ } else {
+ errorHandler.accept(new IllegalStateException("Tried to complete already-completed call"));
+ }
+ }
+ }
+
+ public StreamObserver<ArrowMessage> doPutCustom(final StreamObserver<Flight.PutResult> responseObserverSimple) {
+ ServerCallStreamObserver<Flight.PutResult> responseObserver =
+ (ServerCallStreamObserver<Flight.PutResult>) responseObserverSimple;
+ responseObserver.disableAutoInboundFlowControl();
+ responseObserver.request(1);
+
+ final StreamPipe<PutResult, Flight.PutResult> ackStream = StreamPipe
+ .wrap(responseObserver, PutResult::toProtocol, this::handleExceptionWithMiddleware);
+ final FlightStream fs = new FlightStream(
+ allocator,
+ PENDING_REQUESTS,
+ /* server-upload streams are not cancellable */null,
+ responseObserver::request);
+ // When the ackStream is completed, the FlightStream will be closed with it
+ ackStream.setAutoCloseable(fs);
+ final StreamObserver<ArrowMessage> observer = fs.asObserver();
+ executors.submit(() -> {
+ try {
+ producer.acceptPut(makeContext(responseObserver), fs, ackStream).run();
+ } catch (Exception ex) {
+ ackStream.onError(ex);
+ } finally {
+ // ARROW-6136: Close the stream if and only if acceptPut hasn't closed it itself
+ // We don't do this for other streams since the implementation may be asynchronous
+ ackStream.ensureCompleted();
+ }
+ });
+
+ return observer;
+ }
+
+ @Override
+ public void getFlightInfo(Flight.FlightDescriptor request, StreamObserver<Flight.FlightInfo> responseObserver) {
+ final FlightInfo info;
+ try {
+ info = producer
+ .getFlightInfo(makeContext((ServerCallStreamObserver<?>) responseObserver), new FlightDescriptor(request));
+ } catch (Exception ex) {
+ // Don't capture exceptions from onNext or onCompleted with this block - because then we can't call onError
+ responseObserver.onError(StatusUtils.toGrpcException(ex));
+ return;
+ }
+ responseObserver.onNext(info.toProtocol());
+ responseObserver.onCompleted();
+ }
+
+ /**
+ * Broadcast the given exception to all registered middleware.
+ */
+ private void handleExceptionWithMiddleware(Throwable t) {
+ final Map<Key<?>, FlightServerMiddleware> middleware = ServerInterceptorAdapter.SERVER_MIDDLEWARE_KEY.get();
+ if (middleware == null || middleware.isEmpty()) {
+ logger.error("Uncaught exception in Flight method body", t);
+ return;
+ }
+ middleware.forEach((k, v) -> v.onCallErrored(t));
+ }
+
+ @Override
+ public void getSchema(Flight.FlightDescriptor request, StreamObserver<Flight.SchemaResult> responseObserver) {
+ try {
+ SchemaResult result = producer
+ .getSchema(makeContext((ServerCallStreamObserver<?>) responseObserver),
+ new FlightDescriptor(request));
+ responseObserver.onNext(result.toProtocol());
+ responseObserver.onCompleted();
+ } catch (Exception ex) {
+ responseObserver.onError(StatusUtils.toGrpcException(ex));
+ }
+ }
+
+ /** Ensures that other resources are cleaned up when the service finishes its call. */
+ private static class ExchangeListener extends GetListener {
+
+ private AutoCloseable resource;
+ private boolean closed = false;
+ private Runnable onCancelHandler = null;
+
+ public ExchangeListener(ServerCallStreamObserver<ArrowMessage> responseObserver, Consumer<Throwable> errorHandler) {
+ super(responseObserver, errorHandler);
+ this.resource = null;
+ super.setOnCancelHandler(() -> {
+ try {
+ if (onCancelHandler != null) {
+ onCancelHandler.run();
+ }
+ } finally {
+ cleanup();
+ }
+ });
+ }
+
+ private void cleanup() {
+ if (closed) {
+ // Prevent double-free. gRPC will call the OnCancelHandler even on a normal call end, which means that
+ // we'll double-free without this guard.
+ return;
+ }
+ closed = true;
+ try {
+ AutoCloseables.close(resource);
+ } catch (Exception e) {
+ throw CallStatus.INTERNAL
+ .withCause(e)
+ .withDescription("Server internal error cleaning up resources")
+ .toRuntimeException();
+ }
+ }
+
+ @Override
+ public void error(Throwable ex) {
+ try {
+ this.cleanup();
+ } finally {
+ super.error(ex);
+ }
+ }
+
+ @Override
+ public void completed() {
+ try {
+ this.cleanup();
+ } finally {
+ super.completed();
+ }
+ }
+
+ @Override
+ public void setOnCancelHandler(Runnable handler) {
+ onCancelHandler = handler;
+ }
+ }
+
+ public StreamObserver<ArrowMessage> doExchangeCustom(StreamObserver<ArrowMessage> responseObserverSimple) {
+ final ServerCallStreamObserver<ArrowMessage> responseObserver =
+ (ServerCallStreamObserver<ArrowMessage>) responseObserverSimple;
+ final ExchangeListener listener = new ExchangeListener(
+ responseObserver,
+ this::handleExceptionWithMiddleware);
+ final FlightStream fs = new FlightStream(
+ allocator,
+ PENDING_REQUESTS,
+ /* server-upload streams are not cancellable */null,
+ responseObserver::request);
+ // When service completes the call, this cleans up the FlightStream
+ listener.resource = fs;
+ responseObserver.disableAutoInboundFlowControl();
+ responseObserver.request(1);
+ final StreamObserver<ArrowMessage> observer = fs.asObserver();
+ try {
+ executors.submit(() -> {
+ try {
+ producer.doExchange(makeContext(responseObserver), fs, listener);
+ } catch (Exception ex) {
+ listener.error(ex);
+ }
+ // We do not clean up or close anything here, to allow long-running asynchronous implementations.
+ // It is the service's responsibility to call completed() or error(), which will then clean up the FlightStream.
+ });
+ } catch (Exception ex) {
+ listener.error(ex);
+ }
+ return observer;
+ }
+
+ /**
+ * Call context for the service.
+ */
+ static class CallContext implements FlightProducer.CallContext {
+
+ private final String peerIdentity;
+ private final BooleanSupplier isCancelled;
+
+ CallContext(final String peerIdentity, BooleanSupplier isCancelled) {
+ this.peerIdentity = peerIdentity;
+ this.isCancelled = isCancelled;
+ }
+
+ @Override
+ public String peerIdentity() {
+ return peerIdentity;
+ }
+
+ @Override
+ public boolean isCancelled() {
+ return this.isCancelled.getAsBoolean();
+ }
+
+ @Override
+ public <T extends FlightServerMiddleware> T getMiddleware(Key<T> key) {
+ final Map<Key<?>, FlightServerMiddleware> middleware = ServerInterceptorAdapter.SERVER_MIDDLEWARE_KEY.get();
+ if (middleware == null) {
+ return null;
+ }
+ final FlightServerMiddleware m = middleware.get(key);
+ if (m == null) {
+ return null;
+ }
+ @SuppressWarnings("unchecked") final T result = (T) m;
+ return result;
+ }
+
+ @Override
+ public Map<Key<?>, FlightServerMiddleware> getMiddleware() {
+ final Map<Key<?>, FlightServerMiddleware> middleware = ServerInterceptorAdapter.SERVER_MIDDLEWARE_KEY.get();
+ if (middleware == null) {
+ return Collections.emptyMap();
+ }
+ // This is an unmodifiable map
+ return middleware;
+ }
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightStatusCode.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightStatusCode.java
new file mode 100644
index 000000000..3d96877ba
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightStatusCode.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+/**
+ * A status code describing the result of a Flight call.
+ */
+public enum FlightStatusCode {
+ /**
+ * The call completed successfully. Generally clients will not see this, but middleware may.
+ */
+ OK,
+ /**
+ * An unknown error occurred. This may also be the result of an implementation error on the server-side; by default,
+ * unhandled server exceptions result in this code.
+ */
+ UNKNOWN,
+ /**
+ * An internal/implementation error occurred.
+ */
+ INTERNAL,
+ /**
+ * One or more of the given arguments was invalid.
+ */
+ INVALID_ARGUMENT,
+ /**
+ * The operation timed out.
+ */
+ TIMED_OUT,
+ /**
+ * The operation describes a resource that does not exist.
+ */
+ NOT_FOUND,
+ /**
+ * The operation creates a resource that already exists.
+ */
+ ALREADY_EXISTS,
+ /**
+ * The operation was cancelled.
+ */
+ CANCELLED,
+ /**
+ * The client was not authenticated.
+ */
+ UNAUTHENTICATED,
+ /**
+ * The client did not have permission to make the call.
+ */
+ UNAUTHORIZED,
+ /**
+ * The requested operation is not implemented.
+ */
+ UNIMPLEMENTED,
+ /**
+ * The server cannot currently handle the request. This should be used for retriable requests, i.e. the server
+ * should send this code only if it has not done any work.
+ */
+ UNAVAILABLE,
+ ;
+
+ /**
+ * Create a blank {@link CallStatus} with this code.
+ */
+ public CallStatus toStatus() {
+ return new CallStatus(this);
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightStream.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightStream.java
new file mode 100644
index 000000000..03ce13c97
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightStream.java
@@ -0,0 +1,505 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.LinkedBlockingQueue;
+
+import org.apache.arrow.flight.ArrowMessage.HeaderType;
+import org.apache.arrow.flight.grpc.StatusUtils;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.util.AutoCloseables;
+import org.apache.arrow.util.VisibleForTesting;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.VectorLoader;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.ipc.message.ArrowDictionaryBatch;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.types.MetadataVersion;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.util.DictionaryUtility;
+import org.apache.arrow.vector.validate.MetadataV4UnionChecker;
+
+import com.google.common.util.concurrent.SettableFuture;
+
+import io.grpc.stub.StreamObserver;
+
+/**
+ * An adaptor between protobuf streams and flight data streams.
+ */
+public class FlightStream implements AutoCloseable {
+ // Use AutoCloseable sentinel objects to simplify logic in #close
+ private final AutoCloseable DONE = () -> {
+ };
+ private final AutoCloseable DONE_EX = () -> {
+ };
+
+ private final BufferAllocator allocator;
+ private final Cancellable cancellable;
+ private final LinkedBlockingQueue<AutoCloseable> queue = new LinkedBlockingQueue<>();
+ private final SettableFuture<VectorSchemaRoot> root = SettableFuture.create();
+ private final SettableFuture<FlightDescriptor> descriptor = SettableFuture.create();
+ private final int pendingTarget;
+ private final Requestor requestor;
+ // The completion flags.
+ // This flag is only updated as the user iterates through the data, i.e. it tracks whether the user has read all the
+ // data and closed the stream
+ final CompletableFuture<Void> completed;
+ // This flag is immediately updated when gRPC signals that the server has ended the call. This is used to make sure
+ // we don't block forever trying to write to a server that has rejected a call.
+ final CompletableFuture<Void> cancelled;
+
+ private volatile int pending = 1;
+ private volatile VectorSchemaRoot fulfilledRoot;
+ private DictionaryProvider.MapDictionaryProvider dictionaries;
+ private volatile VectorLoader loader;
+ private volatile Throwable ex;
+ private volatile ArrowBuf applicationMetadata = null;
+ @VisibleForTesting
+ volatile MetadataVersion metadataVersion = null;
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param allocator The allocator to use for creating/reallocating buffers for Vectors.
+ * @param pendingTarget Target number of messages to receive.
+ * @param cancellable Used to cancel mid-stream requests.
+ * @param requestor A callback to determine how many pending items there are.
+ */
+ public FlightStream(BufferAllocator allocator, int pendingTarget, Cancellable cancellable, Requestor requestor) {
+ Objects.requireNonNull(allocator);
+ Objects.requireNonNull(requestor);
+ this.allocator = allocator;
+ this.pendingTarget = pendingTarget;
+ this.cancellable = cancellable;
+ this.requestor = requestor;
+ this.dictionaries = new DictionaryProvider.MapDictionaryProvider();
+ this.completed = new CompletableFuture<>();
+ this.cancelled = new CompletableFuture<>();
+ }
+
+ /**
+ * Get the schema for this stream. Blocks until the schema is available.
+ */
+ public Schema getSchema() {
+ return getRoot().getSchema();
+ }
+
+ /**
+ * Get the provider for dictionaries in this stream.
+ *
+ * <p>Does NOT retain a reference to the underlying dictionaries. Dictionaries may be updated as the stream is read.
+ * This method is intended for stream processing, where the application code will not retain references to values
+ * after the stream is closed.
+ *
+ * @throws IllegalStateException if {@link #takeDictionaryOwnership()} was called
+ * @see #takeDictionaryOwnership()
+ */
+ public DictionaryProvider getDictionaryProvider() {
+ if (dictionaries == null) {
+ throw new IllegalStateException("Dictionary ownership was claimed by the application.");
+ }
+ return dictionaries;
+ }
+
+ /**
+ * Get an owned reference to the dictionaries in this stream. Should be called after finishing reading the stream,
+ * but before closing.
+ *
+ * <p>If called, the client is responsible for closing the dictionaries in this provider. Can only be called once.
+ *
+ * @return The dictionary provider for the stream.
+ * @throws IllegalStateException if called more than once.
+ */
+ public DictionaryProvider takeDictionaryOwnership() {
+ if (dictionaries == null) {
+ throw new IllegalStateException("Dictionary ownership was claimed by the application.");
+ }
+ // Swap out the provider so it is not closed
+ final DictionaryProvider provider = dictionaries;
+ dictionaries = null;
+ return provider;
+ }
+
+ /**
+ * Get the descriptor for this stream. Only applicable on the server side of a DoPut operation. Will block until the
+ * client sends the descriptor.
+ */
+ public FlightDescriptor getDescriptor() {
+ // This blocks until the first message from the client is received.
+ try {
+ return descriptor.get();
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ throw CallStatus.INTERNAL.withCause(e).withDescription("Interrupted").toRuntimeException();
+ } catch (ExecutionException e) {
+ throw CallStatus.INTERNAL.withCause(e).withDescription("Error getting descriptor").toRuntimeException();
+ }
+ }
+
+ /**
+ * Closes the stream (freeing any existing resources).
+ *
+ * <p>If the stream isn't complete and is cancellable, this method will cancel and drain the stream first.
+ */
+ public void close() throws Exception {
+ final List<AutoCloseable> closeables = new ArrayList<>();
+ Throwable suppressor = null;
+ if (cancellable != null) {
+ // Client-side stream. Cancel the call, to help ensure gRPC doesn't deliver a message after close() ends.
+ // On the server side, we can't rely on draining the stream , because this gRPC bug means the completion callback
+ // may never run https://github.com/grpc/grpc-java/issues/5882
+ try {
+ synchronized (cancellable) {
+ if (!cancelled.isDone()) {
+ // Only cancel if the call is not done on the gRPC side
+ cancellable.cancel("Stream closed before end", /* no exception to report */null);
+ }
+ }
+ // Drain the stream without the lock (as next() implicitly needs the lock)
+ while (next()) { }
+ } catch (FlightRuntimeException e) {
+ suppressor = e;
+ }
+ }
+ // Perform these operations under a lock. This way the observer can't enqueue new messages while we're in the
+ // middle of cleanup. This should only be a concern for server-side streams since client-side streams are drained
+ // by the lambda above.
+ synchronized (completed) {
+ try {
+ if (fulfilledRoot != null) {
+ closeables.add(fulfilledRoot);
+ }
+ closeables.add(applicationMetadata);
+ closeables.addAll(queue);
+ if (dictionaries != null) {
+ dictionaries.getDictionaryIds().forEach(id -> closeables.add(dictionaries.lookup(id).getVector()));
+ }
+ if (suppressor != null) {
+ AutoCloseables.close(suppressor, closeables);
+ } else {
+ AutoCloseables.close(closeables);
+ }
+ } finally {
+ // The value of this CompletableFuture is meaningless, only whether it's completed (or has an exception)
+ // No-op if already complete
+ completed.complete(null);
+ }
+ }
+ }
+
+ /**
+ * Blocking request to load next item into list.
+ * @return Whether or not more data was found.
+ */
+ public boolean next() {
+ try {
+ if (completed.isDone() && queue.isEmpty()) {
+ return false;
+ }
+
+ pending--;
+ requestOutstanding();
+
+ Object data = queue.take();
+ if (DONE == data) {
+ queue.put(DONE);
+ // Other code ignores the value of this CompletableFuture, only whether it's completed (or has an exception)
+ completed.complete(null);
+ return false;
+ } else if (DONE_EX == data) {
+ queue.put(DONE_EX);
+ if (ex instanceof Exception) {
+ throw (Exception) ex;
+ } else {
+ throw new Exception(ex);
+ }
+ } else {
+ try (ArrowMessage msg = ((ArrowMessage) data)) {
+ if (msg.getMessageType() == HeaderType.NONE) {
+ updateMetadata(msg);
+ // We received a message without data, so erase any leftover data
+ if (fulfilledRoot != null) {
+ fulfilledRoot.clear();
+ }
+ } else if (msg.getMessageType() == HeaderType.RECORD_BATCH) {
+ checkMetadataVersion(msg);
+ // Ensure we have the root
+ root.get().clear();
+ try (ArrowRecordBatch arb = msg.asRecordBatch()) {
+ loader.load(arb);
+ }
+ updateMetadata(msg);
+ } else if (msg.getMessageType() == HeaderType.DICTIONARY_BATCH) {
+ checkMetadataVersion(msg);
+ // Ensure we have the root
+ root.get().clear();
+ try (ArrowDictionaryBatch arb = msg.asDictionaryBatch()) {
+ final long id = arb.getDictionaryId();
+ if (dictionaries == null) {
+ throw new IllegalStateException("Dictionary ownership was claimed by the application.");
+ }
+ final Dictionary dictionary = dictionaries.lookup(id);
+ if (dictionary == null) {
+ throw new IllegalArgumentException("Dictionary not defined in schema: ID " + id);
+ }
+
+ final FieldVector vector = dictionary.getVector();
+ final VectorSchemaRoot dictionaryRoot = new VectorSchemaRoot(Collections.singletonList(vector.getField()),
+ Collections.singletonList(vector), 0);
+ final VectorLoader dictionaryLoader = new VectorLoader(dictionaryRoot);
+ dictionaryLoader.load(arb.getDictionary());
+ }
+ return next();
+ } else {
+ throw new UnsupportedOperationException("Message type is unsupported: " + msg.getMessageType());
+ }
+ return true;
+ }
+ }
+ } catch (RuntimeException e) {
+ throw e;
+ } catch (ExecutionException e) {
+ throw StatusUtils.fromThrowable(e.getCause());
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ /** Update our metadata reference with a new one from this message. */
+ private void updateMetadata(ArrowMessage msg) {
+ if (this.applicationMetadata != null) {
+ this.applicationMetadata.close();
+ }
+ this.applicationMetadata = msg.getApplicationMetadata();
+ if (this.applicationMetadata != null) {
+ this.applicationMetadata.getReferenceManager().retain();
+ }
+ }
+
+ /** Ensure the Arrow metadata version doesn't change mid-stream. */
+ private void checkMetadataVersion(ArrowMessage msg) {
+ if (msg.asSchemaMessage() == null) {
+ return;
+ }
+ MetadataVersion receivedVersion = MetadataVersion.fromFlatbufID(msg.asSchemaMessage().getMessage().version());
+ if (this.metadataVersion != receivedVersion) {
+ throw new IllegalStateException("Metadata version mismatch: stream started as " +
+ this.metadataVersion + " but got message with version " + receivedVersion);
+ }
+ }
+
+ /**
+ * Get the current vector data from the stream.
+ *
+ * <p>The data in the root may change at any time. Clients should NOT modify the root, but instead unload the data
+ * into their own root.
+ *
+ * @throws FlightRuntimeException if there was an error reading the schema from the stream.
+ */
+ public VectorSchemaRoot getRoot() {
+ try {
+ return root.get();
+ } catch (InterruptedException e) {
+ throw CallStatus.INTERNAL.withCause(e).toRuntimeException();
+ } catch (ExecutionException e) {
+ throw StatusUtils.fromThrowable(e.getCause());
+ }
+ }
+
+ /**
+ * Check if there is a root (i.e. whether the other end has started sending data).
+ *
+ * Updated by calls to {@link #next()}.
+ *
+ * @return true if and only if the other end has started sending data.
+ */
+ public boolean hasRoot() {
+ return root.isDone();
+ }
+
+ /**
+ * Get the most recent metadata sent from the server. This may be cleared by calls to {@link #next()} if the server
+ * sends a message without metadata. This does NOT take ownership of the buffer - call retain() to create a reference
+ * if you need the buffer after a call to {@link #next()}.
+ *
+ * @return the application metadata. May be null.
+ */
+ public ArrowBuf getLatestMetadata() {
+ return applicationMetadata;
+ }
+
+ private synchronized void requestOutstanding() {
+ if (pending < pendingTarget) {
+ requestor.request(pendingTarget - pending);
+ pending = pendingTarget;
+ }
+ }
+
+ private class Observer implements StreamObserver<ArrowMessage> {
+
+ Observer() {
+ super();
+ }
+
+ /** Helper to add an item to the queue under the appropriate lock. */
+ private void enqueue(AutoCloseable message) {
+ synchronized (completed) {
+ if (completed.isDone()) {
+ // The stream is already closed (RPC ended), discard the message
+ AutoCloseables.closeNoChecked(message);
+ } else {
+ queue.add(message);
+ }
+ }
+ }
+
+ @Override
+ public void onNext(ArrowMessage msg) {
+ // Operations here have to be under a lock so that we don't add a message to the queue while in the middle of
+ // close().
+ requestOutstanding();
+ switch (msg.getMessageType()) {
+ case NONE: {
+ // No IPC message - pure metadata or descriptor
+ if (msg.getDescriptor() != null) {
+ descriptor.set(new FlightDescriptor(msg.getDescriptor()));
+ }
+ if (msg.getApplicationMetadata() != null) {
+ enqueue(msg);
+ }
+ break;
+ }
+ case SCHEMA: {
+ Schema schema = msg.asSchema();
+
+ // if there is app metadata in the schema message, make sure
+ // that we don't leak it.
+ ArrowBuf meta = msg.getApplicationMetadata();
+ if (meta != null) {
+ meta.close();
+ }
+
+ final List<Field> fields = new ArrayList<>();
+ final Map<Long, Dictionary> dictionaryMap = new HashMap<>();
+ for (final Field originalField : schema.getFields()) {
+ final Field updatedField = DictionaryUtility.toMemoryFormat(originalField, allocator, dictionaryMap);
+ fields.add(updatedField);
+ }
+ for (final Map.Entry<Long, Dictionary> entry : dictionaryMap.entrySet()) {
+ dictionaries.put(entry.getValue());
+ }
+ schema = new Schema(fields, schema.getCustomMetadata());
+ metadataVersion = MetadataVersion.fromFlatbufID(msg.asSchemaMessage().getMessage().version());
+ try {
+ MetadataV4UnionChecker.checkRead(schema, metadataVersion);
+ } catch (IOException e) {
+ ex = e;
+ enqueue(DONE_EX);
+ break;
+ }
+
+ synchronized (completed) {
+ if (!completed.isDone()) {
+ fulfilledRoot = VectorSchemaRoot.create(schema, allocator);
+ loader = new VectorLoader(fulfilledRoot);
+ if (msg.getDescriptor() != null) {
+ descriptor.set(new FlightDescriptor(msg.getDescriptor()));
+ }
+ root.set(fulfilledRoot);
+ }
+ }
+ break;
+ }
+ case RECORD_BATCH:
+ case DICTIONARY_BATCH:
+ enqueue(msg);
+ break;
+ case TENSOR:
+ default:
+ ex = new UnsupportedOperationException("Unable to handle message of type: " + msg.getMessageType());
+ enqueue(DONE_EX);
+ }
+ }
+
+ @Override
+ public void onError(Throwable t) {
+ ex = StatusUtils.fromThrowable(t);
+ queue.add(DONE_EX);
+ cancelled.complete(null);
+ root.setException(ex);
+ }
+
+ @Override
+ public void onCompleted() {
+ // Depends on gRPC calling onNext and onCompleted non-concurrently
+ cancelled.complete(null);
+ queue.add(DONE);
+ }
+ }
+
+ /**
+ * Cancels sending the stream to a client.
+ *
+ * <p>Callers should drain the stream (with {@link #next()}) to ensure all messages sent before cancellation are
+ * received and to wait for the underlying transport to acknowledge cancellation.
+ */
+ public void cancel(String message, Throwable exception) {
+ if (cancellable == null) {
+ throw new UnsupportedOperationException("Streams cannot be cancelled that are produced by client. " +
+ "Instead, server should reject incoming messages.");
+ }
+ cancellable.cancel(message, exception);
+ // Do not mark the stream as completed, as gRPC may still be delivering messages.
+ }
+
+ StreamObserver<ArrowMessage> asObserver() {
+ return new Observer();
+ }
+
+ /**
+ * Provides a callback to cancel a process that is in progress.
+ */
+ @FunctionalInterface
+ public interface Cancellable {
+ void cancel(String message, Throwable exception);
+ }
+
+ /**
+ * Provides a interface to request more items from a stream producer.
+ */
+ @FunctionalInterface
+ public interface Requestor {
+ /**
+ * Requests <code>count</code> more messages from the instance of this object.
+ */
+ void request(int count);
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/HeaderCallOption.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/HeaderCallOption.java
new file mode 100644
index 000000000..e2fad1a40
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/HeaderCallOption.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import io.grpc.Metadata;
+import io.grpc.stub.AbstractStub;
+import io.grpc.stub.MetadataUtils;
+
+/**
+ * Method option for supplying headers to method calls.
+ */
+public class HeaderCallOption implements CallOptions.GrpcCallOption {
+ private final Metadata propertiesMetadata = new Metadata();
+
+ /**
+ * Header property constructor.
+ *
+ * @param headers the headers that should be sent across. If a header is a string, it should only be valid ASCII
+ * characters. Binary headers should end in "-bin".
+ */
+ public HeaderCallOption(CallHeaders headers) {
+ for (String key : headers.keys()) {
+ if (key.endsWith(Metadata.BINARY_HEADER_SUFFIX)) {
+ final Metadata.Key<byte[]> metaKey = Metadata.Key.of(key, Metadata.BINARY_BYTE_MARSHALLER);
+ headers.getAllByte(key).forEach(v -> propertiesMetadata.put(metaKey, v));
+ } else {
+ final Metadata.Key<String> metaKey = Metadata.Key.of(key, Metadata.ASCII_STRING_MARSHALLER);
+ headers.getAll(key).forEach(v -> propertiesMetadata.put(metaKey, v));
+ }
+ }
+ }
+
+ @Override
+ public <T extends AbstractStub<T>> T wrapStub(T stub) {
+ return MetadataUtils.attachHeaders(stub, propertiesMetadata);
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Location.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Location.java
new file mode 100644
index 000000000..1fbec7b5a
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Location.java
@@ -0,0 +1,158 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.lang.reflect.InvocationTargetException;
+import java.net.InetSocketAddress;
+import java.net.SocketAddress;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.Objects;
+
+import org.apache.arrow.flight.impl.Flight;
+
+/** A URI where a Flight stream is available. */
+public class Location {
+ private final URI uri;
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param uri the URI of the Flight service
+ * @throws IllegalArgumentException if the URI scheme is unsupported
+ */
+ public Location(String uri) throws URISyntaxException {
+ this(new URI(uri));
+ }
+
+ /**
+ * Construct a new instance from an existing URI.
+ *
+ * @param uri the URI of the Flight service
+ */
+ public Location(URI uri) {
+ super();
+ Objects.requireNonNull(uri);
+ this.uri = uri;
+ }
+
+ public URI getUri() {
+ return uri;
+ }
+
+ /**
+ * Helper method to turn this Location into a SocketAddress.
+ *
+ * @return null if could not be converted
+ */
+ SocketAddress toSocketAddress() {
+ switch (uri.getScheme()) {
+ case LocationSchemes.GRPC:
+ case LocationSchemes.GRPC_TLS:
+ case LocationSchemes.GRPC_INSECURE: {
+ return new InetSocketAddress(uri.getHost(), uri.getPort());
+ }
+
+ case LocationSchemes.GRPC_DOMAIN_SOCKET: {
+ try {
+ // This dependency is not available on non-Unix platforms.
+ return (SocketAddress) Class.forName("io.netty.channel.unix.DomainSocketAddress")
+ .getConstructor(String.class)
+ .newInstance(uri.getPath());
+ } catch (InstantiationException | ClassNotFoundException | InvocationTargetException |
+ NoSuchMethodException | IllegalAccessException e) {
+ return null;
+ }
+ }
+
+ default: {
+ return null;
+ }
+ }
+ }
+
+ /**
+ * Convert this Location into its protocol-level representation.
+ */
+ Flight.Location toProtocol() {
+ return Flight.Location.newBuilder().setUri(uri.toString()).build();
+ }
+
+ /**
+ * Construct a URI for a Flight+gRPC server without transport security.
+ *
+ * @throws IllegalArgumentException if the constructed URI is invalid.
+ */
+ public static Location forGrpcInsecure(String host, int port) {
+ try {
+ return new Location(new URI(LocationSchemes.GRPC_INSECURE, null, host, port, null, null, null));
+ } catch (URISyntaxException e) {
+ throw new IllegalArgumentException(e);
+ }
+ }
+
+ /**
+ * Construct a URI for a Flight+gRPC server with transport security.
+ *
+ * @throws IllegalArgumentException if the constructed URI is invalid.
+ */
+ public static Location forGrpcTls(String host, int port) {
+ try {
+ return new Location(new URI(LocationSchemes.GRPC_TLS, null, host, port, null, null, null));
+ } catch (URISyntaxException e) {
+ throw new IllegalArgumentException(e);
+ }
+ }
+
+ /**
+ * Construct a URI for a Flight+gRPC server over a Unix domain socket.
+ *
+ * @throws IllegalArgumentException if the constructed URI is invalid.
+ */
+ public static Location forGrpcDomainSocket(String path) {
+ try {
+ return new Location(new URI(LocationSchemes.GRPC_DOMAIN_SOCKET, null, path, null));
+ } catch (URISyntaxException e) {
+ throw new IllegalArgumentException(e);
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "Location{" +
+ "uri=" + uri +
+ '}';
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ Location location = (Location) o;
+ return uri.equals(location.uri);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(uri);
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/LocationSchemes.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/LocationSchemes.java
new file mode 100644
index 000000000..872e5b1c2
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/LocationSchemes.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+/**
+ * Constants representing well-known URI schemes for Flight services.
+ */
+public final class LocationSchemes {
+ public static final String GRPC = "grpc";
+ public static final String GRPC_INSECURE = "grpc+tcp";
+ public static final String GRPC_DOMAIN_SOCKET = "grpc+unix";
+ public static final String GRPC_TLS = "grpc+tls";
+
+ private LocationSchemes() {
+ throw new AssertionError("Do not instantiate this class.");
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/NoOpFlightProducer.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/NoOpFlightProducer.java
new file mode 100644
index 000000000..d1432f514
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/NoOpFlightProducer.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+/**
+ * A {@link FlightProducer} that throws on all operations.
+ */
+public class NoOpFlightProducer implements FlightProducer {
+
+ @Override
+ public void getStream(CallContext context, Ticket ticket,
+ ServerStreamListener listener) {
+ listener.error(CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException());
+ }
+
+ @Override
+ public void listFlights(CallContext context, Criteria criteria,
+ StreamListener<FlightInfo> listener) {
+ listener.onError(CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException());
+ }
+
+ @Override
+ public FlightInfo getFlightInfo(CallContext context,
+ FlightDescriptor descriptor) {
+ throw CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException();
+ }
+
+ @Override
+ public Runnable acceptPut(CallContext context,
+ FlightStream flightStream, StreamListener<PutResult> ackStream) {
+ throw CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException();
+ }
+
+ @Override
+ public void doAction(CallContext context, Action action,
+ StreamListener<Result> listener) {
+ listener.onError(CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException());
+ }
+
+ @Override
+ public void listActions(CallContext context,
+ StreamListener<ActionType> listener) {
+ listener.onError(CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException());
+ }
+
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/NoOpStreamListener.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/NoOpStreamListener.java
new file mode 100644
index 000000000..e06af1a10
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/NoOpStreamListener.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import org.apache.arrow.flight.FlightProducer.StreamListener;
+
+/**
+ * A {@link StreamListener} that does nothing for all callbacks.
+ * @param <T> The type of the callback object.
+ */
+public class NoOpStreamListener<T> implements StreamListener<T> {
+ private static NoOpStreamListener INSTANCE = new NoOpStreamListener();
+
+ /** Ignores the value received. */
+ @Override
+ public void onNext(T val) {
+ }
+
+ /** Ignores the error received. */
+ @Override
+ public void onError(Throwable t) {
+ }
+
+ /** Ignores the stream completion event. */
+ @Override
+ public void onCompleted() {
+ }
+
+ @SuppressWarnings("unchecked")
+ public static <T> StreamListener<T> getInstance() {
+ // Safe because we never use T
+ return (StreamListener<T>) INSTANCE;
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/OutboundStreamListener.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/OutboundStreamListener.java
new file mode 100644
index 000000000..38a44d0e5
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/OutboundStreamListener.java
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.ipc.message.IpcOption;
+
+/**
+ * An interface for writing data to a peer, client or server.
+ */
+public interface OutboundStreamListener {
+
+ /**
+ * A hint indicating whether the client is ready to receive data without excessive buffering.
+ *
+ * <p>Writers should poll this flag before sending data to respect backpressure from the client and
+ * avoid sending data faster than the client can handle. Ignoring this flag may mean that the server
+ * will start consuming excessive amounts of memory, as it may buffer messages in memory.
+ */
+ boolean isReady();
+
+ /**
+ * Set a callback for when the listener is ready for new calls to putNext(), i.e. {@link #isReady()}
+ * has become true.
+ *
+ * <p>Note that this callback may only be called some time after {@link #isReady()} becomes true, and may never
+ * be called if all executor threads on the server are busy, or the RPC method body is implemented in a blocking
+ * fashion. Note that isReady() must still be checked after the callback is run as it may have been run
+ * spuriously.
+ */
+ default void setOnReadyHandler(Runnable handler) {
+ throw new UnsupportedOperationException("Not yet implemented.");
+ }
+
+ /**
+ * Start sending data, using the schema of the given {@link VectorSchemaRoot}.
+ *
+ * <p>This method must be called before all others, except {@link #putMetadata(ArrowBuf)}.
+ */
+ default void start(VectorSchemaRoot root) {
+ start(root, null, IpcOption.DEFAULT);
+ }
+
+ /**
+ * Start sending data, using the schema of the given {@link VectorSchemaRoot}.
+ *
+ * <p>This method must be called before all others, except {@link #putMetadata(ArrowBuf)}.
+ */
+ default void start(VectorSchemaRoot root, DictionaryProvider dictionaries) {
+ start(root, dictionaries, IpcOption.DEFAULT);
+ }
+
+ /**
+ * Start sending data, using the schema of the given {@link VectorSchemaRoot}.
+ *
+ * <p>This method must be called before all others, except {@link #putMetadata(ArrowBuf)}.
+ */
+ void start(VectorSchemaRoot root, DictionaryProvider dictionaries, IpcOption option);
+
+ /**
+ * Send the current contents of the associated {@link VectorSchemaRoot}.
+ *
+ * <p>This will not necessarily block until the message is actually sent; it may buffer messages
+ * in memory. Use {@link #isReady()} to check if there is backpressure and avoid excessive buffering.
+ */
+ void putNext();
+
+ /**
+ * Send the current contents of the associated {@link VectorSchemaRoot} alongside application-defined metadata.
+ * @param metadata The metadata to send. Ownership of the buffer is transferred to the Flight implementation.
+ */
+ void putNext(ArrowBuf metadata);
+
+ /**
+ * Send a pure metadata message without any associated data.
+ *
+ * <p>This may be called without starting the stream.
+ */
+ void putMetadata(ArrowBuf metadata);
+
+ /**
+ * Indicate an error to the client. Terminates the stream; do not call {@link #completed()} afterwards.
+ */
+ void error(Throwable ex);
+
+ /**
+ * Indicate that transmission is finished.
+ */
+ void completed();
+
+ /**
+ * Toggle whether to ues the zero-copy write optimization.
+ *
+ * <p>By default or when disabled, Arrow may copy data into a buffer for the underlying implementation to
+ * send. When enabled, Arrow will instead try to directly enqueue the Arrow buffer for sending. Not all
+ * implementations support this optimization, so even if enabled, you may not see a difference.
+ *
+ * <p>In this mode, buffers must not be reused after they are written with {@link #putNext()}. For example,
+ * you would have to call {@link VectorSchemaRoot#allocateNew()} after every call to {@link #putNext()}.
+ * Hence, this is not enabled by default.
+ *
+ * <p>The default value can be toggled globally by setting the JVM property arrow.flight.enable_zero_copy_write
+ * or the environment variable ARROW_FLIGHT_ENABLE_ZERO_COPY_WRITE.
+ */
+ default void setUseZeroCopy(boolean enabled) {}
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/OutboundStreamListenerImpl.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/OutboundStreamListenerImpl.java
new file mode 100644
index 000000000..8c1cfde3a
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/OutboundStreamListenerImpl.java
@@ -0,0 +1,132 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import org.apache.arrow.flight.grpc.StatusUtils;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.VectorUnloader;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.ipc.message.IpcOption;
+
+import io.grpc.stub.CallStreamObserver;
+
+/**
+ * A base class for writing Arrow data to a Flight stream.
+ */
+abstract class OutboundStreamListenerImpl implements OutboundStreamListener {
+ private final FlightDescriptor descriptor; // nullable
+ protected final CallStreamObserver<ArrowMessage> responseObserver;
+ protected volatile VectorUnloader unloader; // null until stream started
+ protected IpcOption option; // null until stream started
+ protected boolean tryZeroCopy = ArrowMessage.ENABLE_ZERO_COPY_WRITE;
+
+ OutboundStreamListenerImpl(FlightDescriptor descriptor, CallStreamObserver<ArrowMessage> responseObserver) {
+ Preconditions.checkNotNull(responseObserver, "responseObserver must be provided");
+ this.descriptor = descriptor;
+ this.responseObserver = responseObserver;
+ this.unloader = null;
+ }
+
+ @Override
+ public boolean isReady() {
+ return responseObserver.isReady();
+ }
+
+ @Override
+ public void setOnReadyHandler(Runnable handler) {
+ responseObserver.setOnReadyHandler(handler);
+ }
+
+ @Override
+ public void start(VectorSchemaRoot root, DictionaryProvider dictionaries, IpcOption option) {
+ this.option = option;
+ try {
+ DictionaryUtils.generateSchemaMessages(root.getSchema(), descriptor, dictionaries, option,
+ responseObserver::onNext);
+ } catch (RuntimeException e) {
+ // Propagate runtime exceptions, like those raised when trying to write unions with V4 metadata
+ throw e;
+ } catch (Exception e) {
+ // Only happens if closing buffers somehow fails - indicates application is an unknown state so propagate
+ // the exception
+ throw new RuntimeException("Could not generate and send all schema messages", e);
+ }
+ // We include the null count and align buffers to be compatible with Flight/C++
+ unloader = new VectorUnloader(root, /* includeNullCount */ true, /* alignBuffers */ true);
+ }
+
+ @Override
+ public void putNext() {
+ putNext(null);
+ }
+
+ /**
+ * Busy-wait until the stream is ready.
+ *
+ * <p>This is overridable as client/server have different behavior.
+ */
+ protected abstract void waitUntilStreamReady();
+
+ @Override
+ public void putNext(ArrowBuf metadata) {
+ if (unloader == null) {
+ throw CallStatus.INTERNAL.withDescription("Stream was not started, call start()").toRuntimeException();
+ }
+
+ waitUntilStreamReady();
+ // close is a no-op if the message has been written to gRPC, otherwise frees the associated buffers
+ // in some code paths (e.g. if the call is cancelled), gRPC does not write the message, so we need to clean up
+ // ourselves. Normally, writing the ArrowMessage will transfer ownership of the data to gRPC/Netty.
+ try (final ArrowMessage message = new ArrowMessage(unloader.getRecordBatch(), metadata, tryZeroCopy, option)) {
+ responseObserver.onNext(message);
+ } catch (Exception e) {
+ // This exception comes from ArrowMessage#close, not responseObserver#onNext.
+ // Generally this should not happen - ArrowMessage's implementation only closes non-throwing things.
+ // The user can't reasonably do anything about this, but if something does throw, we shouldn't let
+ // execution continue since other state (e.g. allocators) may be in an odd state.
+ throw new RuntimeException("Could not free ArrowMessage", e);
+ }
+ }
+
+ @Override
+ public void putMetadata(ArrowBuf metadata) {
+ waitUntilStreamReady();
+ try (final ArrowMessage message = new ArrowMessage(metadata)) {
+ responseObserver.onNext(message);
+ } catch (Exception e) {
+ throw StatusUtils.fromThrowable(e);
+ }
+ }
+
+ @Override
+ public void error(Throwable ex) {
+ responseObserver.onError(StatusUtils.toGrpcException(ex));
+ }
+
+ @Override
+ public void completed() {
+ responseObserver.onCompleted();
+ }
+
+ @Override
+ public void setUseZeroCopy(boolean enabled) {
+ tryZeroCopy = enabled;
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/PutResult.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/PutResult.java
new file mode 100644
index 000000000..862401312
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/PutResult.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import org.apache.arrow.flight.impl.Flight;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.ReferenceManager;
+
+import com.google.protobuf.ByteString;
+
+/**
+ * A message from the server during a DoPut operation.
+ *
+ * <p>This object owns an {@link ArrowBuf} and should be closed when you are done with it.
+ */
+public class PutResult implements AutoCloseable {
+
+ private ArrowBuf applicationMetadata;
+
+ private PutResult(ArrowBuf metadata) {
+ applicationMetadata = metadata;
+ }
+
+ /**
+ * Create a PutResult with application-specific metadata.
+ *
+ * <p>This method assumes ownership of the {@link ArrowBuf}.
+ */
+ public static PutResult metadata(ArrowBuf metadata) {
+ if (metadata == null) {
+ return empty();
+ }
+ return new PutResult(metadata);
+ }
+
+ /** Create an empty PutResult. */
+ public static PutResult empty() {
+ return new PutResult(null);
+ }
+
+ /**
+ * Get the metadata in this message. May be null.
+ *
+ * <p>Ownership of the {@link ArrowBuf} is retained by this object. Call {@link ReferenceManager#retain()} to preserve
+ * a reference.
+ */
+ public ArrowBuf getApplicationMetadata() {
+ return applicationMetadata;
+ }
+
+ Flight.PutResult toProtocol() {
+ if (applicationMetadata == null) {
+ return Flight.PutResult.getDefaultInstance();
+ }
+ return Flight.PutResult.newBuilder().setAppMetadata(ByteString.copyFrom(applicationMetadata.nioBuffer())).build();
+ }
+
+ /**
+ * Construct a PutResult from a Protobuf message.
+ *
+ * @param allocator The allocator to use for allocating application metadata memory. The result object owns the
+ * allocated buffer, if any.
+ * @param message The gRPC/Protobuf message.
+ */
+ static PutResult fromProtocol(BufferAllocator allocator, Flight.PutResult message) {
+ final ArrowBuf buf = allocator.buffer(message.getAppMetadata().size());
+ message.getAppMetadata().asReadOnlyByteBufferList().forEach(bb -> {
+ buf.setBytes(buf.writerIndex(), bb);
+ buf.writerIndex(buf.writerIndex() + bb.limit());
+ });
+ return new PutResult(buf);
+ }
+
+ @Override
+ public void close() {
+ if (applicationMetadata != null) {
+ applicationMetadata.close();
+ }
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/RequestContext.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/RequestContext.java
new file mode 100644
index 000000000..5117d05c2
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/RequestContext.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.util.Set;
+
+/**
+ * Tracks variables about the current request.
+ */
+public interface RequestContext {
+ /**
+ * Register a variable and a value.
+ * @param key the variable name.
+ * @param value the value.
+ */
+ void put(String key, String value);
+
+ /**
+ * Retrieve a registered variable.
+ * @param key the variable name.
+ * @return the value, or null if not found.
+ */
+ String get(String key);
+
+ /**
+ * Retrieves the keys that have been registered to this context.
+ * @return the keys used in this context.
+ */
+ Set<String> keySet();
+
+ /**
+ * Deletes a registered variable.
+ * @return the value associated with the deleted variable, or null if the key doesn't exist.
+ */
+ String remove(String key);
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Result.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Result.java
new file mode 100644
index 000000000..5d6ce485d
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Result.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import org.apache.arrow.flight.impl.Flight;
+
+import com.google.protobuf.ByteString;
+
+/**
+ * Opaque result returned after executing an action.
+ *
+ * <p>POJO wrapper around the Flight protocol buffer message sharing the same name.
+ */
+public class Result {
+
+ private final byte[] body;
+
+ public Result(byte[] body) {
+ this.body = body;
+ }
+
+ Result(Flight.Result result) {
+ this.body = result.getBody().toByteArray();
+ }
+
+ public byte[] getBody() {
+ return body;
+ }
+
+ Flight.Result toProtocol() {
+ return Flight.Result.newBuilder()
+ .setBody(ByteString.copyFrom(body))
+ .build();
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SchemaResult.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SchemaResult.java
new file mode 100644
index 000000000..8a5e7d9a4
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SchemaResult.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.channels.Channels;
+
+import org.apache.arrow.flight.impl.Flight;
+import org.apache.arrow.vector.ipc.ReadChannel;
+import org.apache.arrow.vector.ipc.WriteChannel;
+import org.apache.arrow.vector.ipc.message.IpcOption;
+import org.apache.arrow.vector.ipc.message.MessageSerializer;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.validate.MetadataV4UnionChecker;
+
+import com.fasterxml.jackson.databind.util.ByteBufferBackedInputStream;
+import com.google.common.collect.ImmutableList;
+import com.google.protobuf.ByteString;
+
+/**
+ * Opaque result returned after executing a getSchema request.
+ *
+ * <p>POJO wrapper around the Flight protocol buffer message sharing the same name.
+ */
+public class SchemaResult {
+
+ private final Schema schema;
+ private final IpcOption option;
+
+ public SchemaResult(Schema schema) {
+ this(schema, IpcOption.DEFAULT);
+ }
+
+ /**
+ * Create a schema result with specific IPC options for serialization.
+ */
+ public SchemaResult(Schema schema, IpcOption option) {
+ MetadataV4UnionChecker.checkForUnion(schema.getFields().iterator(), option.metadataVersion);
+ this.schema = schema;
+ this.option = option;
+ }
+
+ public Schema getSchema() {
+ return schema;
+ }
+
+ /**
+ * Converts to the protocol buffer representation.
+ */
+ Flight.SchemaResult toProtocol() {
+ // Encode schema in a Message payload
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ try {
+ MessageSerializer.serialize(new WriteChannel(Channels.newChannel(baos)), schema, option);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ return Flight.SchemaResult.newBuilder()
+ .setSchema(ByteString.copyFrom(baos.toByteArray()))
+ .build();
+
+ }
+
+ /**
+ * Converts from the protocol buffer representation.
+ */
+ static SchemaResult fromProtocol(Flight.SchemaResult pbSchemaResult) {
+ try {
+ final ByteBuffer schemaBuf = pbSchemaResult.getSchema().asReadOnlyByteBuffer();
+ Schema schema = pbSchemaResult.getSchema().size() > 0 ?
+ MessageSerializer.deserializeSchema(
+ new ReadChannel(Channels.newChannel(new ByteBufferBackedInputStream(schemaBuf))))
+ : new Schema(ImmutableList.of());
+ return new SchemaResult(schema);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ServerHeaderMiddleware.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ServerHeaderMiddleware.java
new file mode 100644
index 000000000..527c3128c
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ServerHeaderMiddleware.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+/**
+ * Middleware that's used to extract and pass headers to the server during requests.
+ */
+public class ServerHeaderMiddleware implements FlightServerMiddleware {
+ /**
+ * Factory for accessing ServerHeaderMiddleware.
+ */
+ public static class Factory implements FlightServerMiddleware.Factory<ServerHeaderMiddleware> {
+ /**
+ * Construct a factory for receiving call headers.
+ */
+ public Factory() {
+ }
+
+ @Override
+ public ServerHeaderMiddleware onCallStarted(CallInfo callInfo, CallHeaders incomingHeaders,
+ RequestContext context) {
+ return new ServerHeaderMiddleware(incomingHeaders);
+ }
+ }
+
+ private final CallHeaders headers;
+
+ private ServerHeaderMiddleware(CallHeaders incomingHeaders) {
+ this.headers = incomingHeaders;
+ }
+
+ /**
+ * Retrieve the headers for this call.
+ */
+ public CallHeaders headers() {
+ return headers;
+ }
+
+ @Override
+ public void onBeforeSendingHeaders(CallHeaders outgoingHeaders) {
+ }
+
+ @Override
+ public void onCallCompleted(CallStatus status) {
+ }
+
+ @Override
+ public void onCallErrored(Throwable err) {
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/StreamPipe.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/StreamPipe.java
new file mode 100644
index 000000000..d506914d5
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/StreamPipe.java
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.util.function.Consumer;
+import java.util.function.Function;
+
+import org.apache.arrow.flight.FlightProducer.StreamListener;
+import org.apache.arrow.flight.grpc.StatusUtils;
+import org.apache.arrow.util.AutoCloseables;
+
+import io.grpc.stub.StreamObserver;
+
+/**
+ * Shim listener to avoid exposing GRPC internals.
+
+ * @param <FROM> From Type
+ * @param <TO> To Type
+ */
+class StreamPipe<FROM, TO> implements StreamListener<FROM> {
+
+ private final StreamObserver<TO> delegate;
+ private final Function<FROM, TO> mapFunction;
+ private final Consumer<Throwable> errorHandler;
+ private AutoCloseable resource;
+ private boolean closed = false;
+
+ /**
+ * Wrap the given gRPC StreamObserver with a transformation function.
+ *
+ * @param delegate The {@link StreamObserver} to wrap.
+ * @param func The transformation function.
+ * @param errorHandler A handler for uncaught exceptions (e.g. if something tries to double-close this stream).
+ * @param <FROM> The source type.
+ * @param <TO> The output type.
+ * @return A wrapped listener.
+ */
+ public static <FROM, TO> StreamPipe<FROM, TO> wrap(StreamObserver<TO> delegate, Function<FROM, TO> func,
+ Consumer<Throwable> errorHandler) {
+ return new StreamPipe<>(delegate, func, errorHandler);
+ }
+
+ public StreamPipe(StreamObserver<TO> delegate, Function<FROM, TO> func, Consumer<Throwable> errorHandler) {
+ super();
+ this.delegate = delegate;
+ this.mapFunction = func;
+ this.errorHandler = errorHandler;
+ this.resource = null;
+ }
+
+ /** Set an AutoCloseable resource to be cleaned up when the gRPC observer is to be completed. */
+ void setAutoCloseable(AutoCloseable ac) {
+ resource = ac;
+ }
+
+ @Override
+ public void onNext(FROM val) {
+ delegate.onNext(mapFunction.apply(val));
+ }
+
+ @Override
+ public void onError(Throwable t) {
+ if (closed) {
+ errorHandler.accept(t);
+ return;
+ }
+ try {
+ AutoCloseables.close(resource);
+ } catch (Exception e) {
+ errorHandler.accept(e);
+ } finally {
+ // Set closed to true in case onError throws, so that we don't try to close again
+ closed = true;
+ delegate.onError(StatusUtils.toGrpcException(t));
+ }
+ }
+
+ @Override
+ public void onCompleted() {
+ if (closed) {
+ errorHandler.accept(new IllegalStateException("Tried to complete already-completed call"));
+ return;
+ }
+ try {
+ AutoCloseables.close(resource);
+ } catch (Exception e) {
+ errorHandler.accept(e);
+ } finally {
+ // Set closed to true in case onCompleted throws, so that we don't try to close again
+ closed = true;
+ delegate.onCompleted();
+ }
+ }
+
+ /**
+ * Ensure this stream has been completed.
+ */
+ void ensureCompleted() {
+ if (!closed) {
+ onCompleted();
+ }
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SyncPutListener.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SyncPutListener.java
new file mode 100644
index 000000000..730cf4924
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SyncPutListener.java
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.arrow.flight.grpc.StatusUtils;
+import org.apache.arrow.memory.ArrowBuf;
+
+/**
+ * A listener for server-sent application metadata messages during a Flight DoPut. This class wraps the messages in a
+ * synchronous interface.
+ */
+public final class SyncPutListener implements FlightClient.PutListener, AutoCloseable {
+
+ private final LinkedBlockingQueue<Object> queue;
+ private final CompletableFuture<Void> completed;
+ private static final Object DONE = new Object();
+ private static final Object DONE_WITH_EXCEPTION = new Object();
+
+ public SyncPutListener() {
+ queue = new LinkedBlockingQueue<>();
+ completed = new CompletableFuture<>();
+ }
+
+ private PutResult unwrap(Object queueItem) throws InterruptedException, ExecutionException {
+ if (queueItem == DONE) {
+ queue.put(queueItem);
+ return null;
+ } else if (queueItem == DONE_WITH_EXCEPTION) {
+ queue.put(queueItem);
+ completed.get();
+ }
+ return (PutResult) queueItem;
+ }
+
+ /**
+ * Get the next message from the server, blocking until it is available.
+ *
+ * @return The next message, or null if the server is done sending messages. The caller assumes ownership of the
+ * metadata and must remember to close it.
+ * @throws InterruptedException if interrupted while waiting.
+ * @throws ExecutionException if the server sent an error, or if there was an internal error.
+ */
+ public PutResult read() throws InterruptedException, ExecutionException {
+ return unwrap(queue.take());
+ }
+
+ /**
+ * Get the next message from the server, blocking for the specified amount of time until it is available.
+ *
+ * @return The next message, or null if the server is done sending messages or no message arrived before the timeout.
+ * The caller assumes ownership of the metadata and must remember to close it.
+ * @throws InterruptedException if interrupted while waiting.
+ * @throws ExecutionException if the server sent an error, or if there was an internal error.
+ */
+ public PutResult poll(long timeout, TimeUnit unit) throws InterruptedException, ExecutionException {
+ return unwrap(queue.poll(timeout, unit));
+ }
+
+ @Override
+ public void getResult() {
+ try {
+ completed.get();
+ } catch (ExecutionException e) {
+ throw StatusUtils.fromThrowable(e.getCause());
+ } catch (InterruptedException e) {
+ throw StatusUtils.fromThrowable(e);
+ }
+ }
+
+ @Override
+ public void onNext(PutResult val) {
+ final ArrowBuf metadata = val.getApplicationMetadata();
+ metadata.getReferenceManager().retain();
+ queue.add(PutResult.metadata(metadata));
+ }
+
+ @Override
+ public void onError(Throwable t) {
+ completed.completeExceptionally(StatusUtils.fromThrowable(t));
+ queue.add(DONE_WITH_EXCEPTION);
+ }
+
+ @Override
+ public void onCompleted() {
+ completed.complete(null);
+ queue.add(DONE);
+ }
+
+ @Override
+ public void close() {
+ queue.forEach(o -> {
+ if (o instanceof PutResult) {
+ ((PutResult) o).close();
+ }
+ });
+ }
+
+ @Override
+ public boolean isCancelled() {
+ return completed.isDone();
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Ticket.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Ticket.java
new file mode 100644
index 000000000..a93cd0879
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Ticket.java
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+
+import org.apache.arrow.flight.impl.Flight;
+
+import com.google.protobuf.ByteString;
+
+/**
+ * Endpoint for a particular stream.
+ */
+public class Ticket {
+ private final byte[] bytes;
+
+ public Ticket(byte[] bytes) {
+ super();
+ this.bytes = bytes;
+ }
+
+ public byte[] getBytes() {
+ return bytes;
+ }
+
+ Ticket(org.apache.arrow.flight.impl.Flight.Ticket ticket) {
+ this.bytes = ticket.getTicket().toByteArray();
+ }
+
+ Flight.Ticket toProtocol() {
+ return Flight.Ticket.newBuilder()
+ .setTicket(ByteString.copyFrom(bytes))
+ .build();
+ }
+
+ /**
+ * Get the serialized form of this protocol message.
+ *
+ * <p>Intended to help interoperability by allowing non-Flight services to still return Flight types.
+ */
+ public ByteBuffer serialize() {
+ return ByteBuffer.wrap(toProtocol().toByteArray());
+ }
+
+ /**
+ * Parse the serialized form of this protocol message.
+ *
+ * <p>Intended to help interoperability by allowing Flight clients to obtain stream info from non-Flight services.
+ *
+ * @param serialized The serialized form of the Ticket, as returned by {@link #serialize()}.
+ * @return The deserialized Ticket.
+ * @throws IOException if the serialized form is invalid.
+ */
+ public static Ticket deserialize(ByteBuffer serialized) throws IOException {
+ return new Ticket(Flight.Ticket.parseFrom(serialized));
+ }
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + Arrays.hashCode(bytes);
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj) {
+ return true;
+ }
+ if (obj == null) {
+ return false;
+ }
+ if (getClass() != obj.getClass()) {
+ return false;
+ }
+ Ticket other = (Ticket) obj;
+ if (!Arrays.equals(bytes, other.bytes)) {
+ return false;
+ }
+ return true;
+ }
+
+
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/AuthConstants.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/AuthConstants.java
new file mode 100644
index 000000000..ac55872e5
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/AuthConstants.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.auth;
+
+import org.apache.arrow.flight.FlightConstants;
+
+import io.grpc.Context;
+import io.grpc.Metadata.BinaryMarshaller;
+import io.grpc.Metadata.Key;
+import io.grpc.MethodDescriptor;
+
+/**
+ * Constants used in authorization of flight connections.
+ */
+public final class AuthConstants {
+
+ public static final String HANDSHAKE_DESCRIPTOR_NAME = MethodDescriptor
+ .generateFullMethodName(FlightConstants.SERVICE, "Handshake");
+ public static final String TOKEN_NAME = "Auth-Token-bin";
+ public static final Key<byte[]> TOKEN_KEY = Key.of(TOKEN_NAME, new BinaryMarshaller<byte[]>() {
+
+ @Override
+ public byte[] toBytes(byte[] value) {
+ return value;
+ }
+
+ @Override
+ public byte[] parseBytes(byte[] serialized) {
+ return serialized;
+ }
+ });
+
+ public static final Context.Key<String> PEER_IDENTITY_KEY = Context.keyWithDefault("arrow-flight-peer-identity", "");
+
+ private AuthConstants() {}
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/BasicClientAuthHandler.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/BasicClientAuthHandler.java
new file mode 100644
index 000000000..c6dca97fb
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/BasicClientAuthHandler.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.auth;
+
+import java.util.Iterator;
+
+import org.apache.arrow.flight.impl.Flight.BasicAuth;
+
+/**
+ * A client auth handler that supports username and password.
+ */
+public class BasicClientAuthHandler implements ClientAuthHandler {
+
+ private final String name;
+ private final String password;
+ private byte[] token = null;
+
+ public BasicClientAuthHandler(String name, String password) {
+ this.name = name;
+ this.password = password;
+ }
+
+ @Override
+ public void authenticate(ClientAuthSender outgoing, Iterator<byte[]> incoming) {
+ BasicAuth.Builder builder = BasicAuth.newBuilder();
+ if (name != null) {
+ builder.setUsername(name);
+ }
+
+ if (password != null) {
+ builder.setPassword(password);
+ }
+
+ outgoing.send(builder.build().toByteArray());
+ this.token = incoming.next();
+ }
+
+ @Override
+ public byte[] getCallToken() {
+ return token;
+ }
+
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/BasicServerAuthHandler.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/BasicServerAuthHandler.java
new file mode 100644
index 000000000..34e3efc0d
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/BasicServerAuthHandler.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.auth;
+
+import java.util.Iterator;
+import java.util.Optional;
+
+import org.apache.arrow.flight.impl.Flight.BasicAuth;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.protobuf.InvalidProtocolBufferException;
+
+/**
+ * A ServerAuthHandler for username/password authentication.
+ */
+public class BasicServerAuthHandler implements ServerAuthHandler {
+
+ private static final Logger logger = LoggerFactory.getLogger(BasicServerAuthHandler.class);
+ private final BasicAuthValidator authValidator;
+
+ public BasicServerAuthHandler(BasicAuthValidator authValidator) {
+ super();
+ this.authValidator = authValidator;
+ }
+
+ /**
+ * Interface that this handler delegates for determining if credentials are valid.
+ */
+ public interface BasicAuthValidator {
+
+ byte[] getToken(String username, String password) throws Exception;
+
+ Optional<String> isValid(byte[] token);
+
+ }
+
+ @Override
+ public boolean authenticate(ServerAuthSender outgoing, Iterator<byte[]> incoming) {
+ byte[] bytes = incoming.next();
+ try {
+ BasicAuth auth = BasicAuth.parseFrom(bytes);
+ byte[] token = authValidator.getToken(auth.getUsername(), auth.getPassword());
+ outgoing.send(token);
+ return true;
+ } catch (InvalidProtocolBufferException e) {
+ logger.debug("Failure parsing auth message.", e);
+ } catch (Exception e) {
+ logger.debug("Unknown error during authorization.", e);
+ }
+
+ return false;
+ }
+
+ @Override
+ public Optional<String> isValid(byte[] token) {
+ return authValidator.isValid(token);
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ClientAuthHandler.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ClientAuthHandler.java
new file mode 100644
index 000000000..985e10aa4
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ClientAuthHandler.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.auth;
+
+import java.util.Iterator;
+
+/**
+ * Implement authentication for Flight on the client side.
+ */
+public interface ClientAuthHandler {
+ /**
+ * Handle the initial handshake with the server.
+ * @param outgoing A channel to send data to the server.
+ * @param incoming An iterator of incoming data from the server.
+ */
+ void authenticate(ClientAuthSender outgoing, Iterator<byte[]> incoming);
+
+ /**
+ * Get the per-call authentication token.
+ */
+ byte[] getCallToken();
+
+ /**
+ * A communication channel to the server during initial connection.
+ */
+ interface ClientAuthSender {
+
+ /**
+ * Send the server a message.
+ */
+ void send(byte[] payload);
+
+ /**
+ * Signal an error to the server and abort the authentication attempt.
+ */
+ void onError(Throwable cause);
+
+ }
+
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ClientAuthInterceptor.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ClientAuthInterceptor.java
new file mode 100644
index 000000000..3d28b7ba7
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ClientAuthInterceptor.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.auth;
+
+import io.grpc.CallOptions;
+import io.grpc.Channel;
+import io.grpc.ClientCall;
+import io.grpc.ClientInterceptor;
+import io.grpc.ForwardingClientCall.SimpleForwardingClientCall;
+import io.grpc.Metadata;
+import io.grpc.MethodDescriptor;
+
+/**
+ * GRPC client intercepter that handles authentication with the server.
+ */
+public class ClientAuthInterceptor implements ClientInterceptor {
+ private volatile ClientAuthHandler authHandler = null;
+
+ public void setAuthHandler(ClientAuthHandler authHandler) {
+ this.authHandler = authHandler;
+ }
+
+ public ClientAuthInterceptor() {
+ }
+
+ public boolean hasAuthHandler() {
+ return authHandler != null;
+ }
+
+ @Override
+ public <ReqT, RespT> ClientCall<ReqT, RespT> interceptCall(MethodDescriptor<ReqT, RespT> methodDescriptor,
+ CallOptions callOptions, Channel next) {
+ ClientCall<ReqT, RespT> call = next.newCall(methodDescriptor, callOptions);
+
+ // once we have an auth header, add that to the calls.
+ if (authHandler != null) {
+ call = new HeaderAttachingClientCall<>(call);
+ }
+
+ return call;
+ }
+
+ private final class HeaderAttachingClientCall<ReqT, RespT> extends SimpleForwardingClientCall<ReqT, RespT> {
+
+ private HeaderAttachingClientCall(ClientCall<ReqT, RespT> call) {
+ super(call);
+ }
+
+ @Override
+ public void start(Listener<RespT> responseListener, Metadata headers) {
+ final Metadata authHeaders = new Metadata();
+ authHeaders.put(AuthConstants.TOKEN_KEY, authHandler.getCallToken());
+ headers.merge(authHeaders);
+ super.start(responseListener, headers);
+ }
+ }
+
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ClientAuthWrapper.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ClientAuthWrapper.java
new file mode 100644
index 000000000..e86dc163c
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ClientAuthWrapper.java
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.auth;
+
+import java.util.Iterator;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.LinkedBlockingQueue;
+
+import org.apache.arrow.flight.auth.ClientAuthHandler.ClientAuthSender;
+import org.apache.arrow.flight.grpc.StatusUtils;
+import org.apache.arrow.flight.impl.Flight.HandshakeRequest;
+import org.apache.arrow.flight.impl.Flight.HandshakeResponse;
+import org.apache.arrow.flight.impl.FlightServiceGrpc.FlightServiceStub;
+
+import com.google.protobuf.ByteString;
+
+import io.grpc.StatusRuntimeException;
+import io.grpc.stub.StreamObserver;
+
+/**
+ * Utility class for performing authorization over using a GRPC stub.
+ */
+public class ClientAuthWrapper {
+
+ /**
+ * Do client auth for a client. The stub will be authenticated after this method returns.
+ *
+ * @param authHandler The handler to use.
+ * @param stub The service stub.
+ */
+ public static void doClientAuth(ClientAuthHandler authHandler, FlightServiceStub stub) {
+ AuthObserver observer = new AuthObserver();
+ try {
+ observer.responseObserver = stub.handshake(observer);
+ authHandler.authenticate(observer.sender, observer.iter);
+ if (!observer.sender.errored) {
+ observer.responseObserver.onCompleted();
+ }
+ } catch (StatusRuntimeException sre) {
+ throw StatusUtils.fromGrpcRuntimeException(sre);
+ }
+ try {
+ if (!observer.completed.get()) {
+ // TODO: ARROW-5681
+ throw new RuntimeException("Unauthenticated");
+ }
+ } catch (InterruptedException e) {
+ throw new RuntimeException(e);
+ } catch (ExecutionException e) {
+ throw StatusUtils.fromThrowable(e.getCause());
+ }
+ }
+
+ private static class AuthObserver implements StreamObserver<HandshakeResponse> {
+
+ private volatile StreamObserver<HandshakeRequest> responseObserver;
+ private final LinkedBlockingQueue<byte[]> messages = new LinkedBlockingQueue<>();
+ private final AuthSender sender = new AuthSender();
+ private CompletableFuture<Boolean> completed;
+
+ public AuthObserver() {
+ super();
+ completed = new CompletableFuture<>();
+ }
+
+ @Override
+ public void onNext(HandshakeResponse value) {
+ ByteString payload = value.getPayload();
+ if (payload != null) {
+ messages.add(payload.toByteArray());
+ }
+ }
+
+ private Iterator<byte[]> iter = new Iterator<byte[]>() {
+
+ @Override
+ public byte[] next() {
+ while (!completed.isDone() || !messages.isEmpty()) {
+ byte[] bytes = messages.poll();
+ if (bytes == null) {
+ // busy wait.
+ continue;
+ } else {
+ return bytes;
+ }
+ }
+
+ if (completed.isCompletedExceptionally()) {
+ // Preserve prior exception behavior
+ // TODO: with ARROW-5681, throw an appropriate Flight exception if gRPC raised an exception
+ try {
+ completed.get();
+ } catch (InterruptedException e) {
+ throw new RuntimeException(e);
+ } catch (ExecutionException e) {
+ if (e.getCause() instanceof StatusRuntimeException) {
+ throw (StatusRuntimeException) e.getCause();
+ }
+ throw new RuntimeException(e);
+ }
+ }
+
+ throw new IllegalStateException("You attempted to retrieve messages after there were none.");
+ }
+
+ @Override
+ public boolean hasNext() {
+ return !messages.isEmpty();
+ }
+ };
+
+ @Override
+ public void onError(Throwable t) {
+ completed.completeExceptionally(t);
+ }
+
+ private class AuthSender implements ClientAuthSender {
+
+ private boolean errored = false;
+
+ @Override
+ public void send(byte[] payload) {
+ try {
+ responseObserver.onNext(HandshakeRequest.newBuilder()
+ .setPayload(ByteString.copyFrom(payload))
+ .build());
+ } catch (StatusRuntimeException sre) {
+ throw StatusUtils.fromGrpcRuntimeException(sre);
+ }
+ }
+
+ @Override
+ public void onError(Throwable cause) {
+ this.errored = true;
+ responseObserver.onError(StatusUtils.toGrpcException(cause));
+ }
+
+ }
+
+ @Override
+ public void onCompleted() {
+ completed.complete(true);
+ }
+ }
+
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ServerAuthHandler.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ServerAuthHandler.java
new file mode 100644
index 000000000..3a978b131
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ServerAuthHandler.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.auth;
+
+import java.util.Iterator;
+import java.util.Optional;
+
+/**
+ * Interface for Server side authentication handlers.
+ */
+public interface ServerAuthHandler {
+
+ /**
+ * Validate the client token provided on each call.
+ *
+ * @return An empty optional if the client is not authenticated; the peer identity otherwise (may be the empty
+ * string).
+ */
+ Optional<String> isValid(byte[] token);
+
+ /**
+ * Handle the initial handshake with the client.
+ *
+ * @param outgoing A writer to send messages to the client.
+ * @param incoming An iterator of messages from the client.
+ * @return true if client is authenticated, false otherwise.
+ */
+ boolean authenticate(ServerAuthSender outgoing, Iterator<byte[]> incoming);
+
+ /**
+ * Interface for a server implementations to send back authentication messages
+ * back to the client.
+ */
+ interface ServerAuthSender {
+
+ void send(byte[] payload);
+
+ void onError(Throwable cause);
+
+ }
+
+ /**
+ * An auth handler that does nothing.
+ */
+ ServerAuthHandler NO_OP = new ServerAuthHandler() {
+
+ @Override
+ public Optional<String> isValid(byte[] token) {
+ return Optional.of("");
+ }
+
+ @Override
+ public boolean authenticate(ServerAuthSender outgoing, Iterator<byte[]> incoming) {
+ return true;
+ }
+ };
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ServerAuthInterceptor.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ServerAuthInterceptor.java
new file mode 100644
index 000000000..5bff3784e
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ServerAuthInterceptor.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.auth;
+
+import java.util.Optional;
+
+import org.apache.arrow.flight.FlightRuntimeException;
+import org.apache.arrow.flight.grpc.StatusUtils;
+
+import io.grpc.Context;
+import io.grpc.Contexts;
+import io.grpc.Metadata;
+import io.grpc.ServerCall;
+import io.grpc.ServerCall.Listener;
+import io.grpc.ServerCallHandler;
+import io.grpc.ServerInterceptor;
+import io.grpc.Status;
+import io.grpc.StatusRuntimeException;
+
+/**
+ * GRPC Interceptor for performing authentication.
+ */
+public class ServerAuthInterceptor implements ServerInterceptor {
+
+ private final ServerAuthHandler authHandler;
+
+ public ServerAuthInterceptor(ServerAuthHandler authHandler) {
+ this.authHandler = authHandler;
+ }
+
+ @Override
+ public <ReqT, RespT> Listener<ReqT> interceptCall(ServerCall<ReqT, RespT> call, Metadata headers,
+ ServerCallHandler<ReqT, RespT> next) {
+ if (!call.getMethodDescriptor().getFullMethodName().equals(AuthConstants.HANDSHAKE_DESCRIPTOR_NAME)) {
+ final Optional<String> peerIdentity;
+
+ // Allow customizing the response code by throwing FlightRuntimeException
+ try {
+ peerIdentity = isValid(headers);
+ } catch (FlightRuntimeException e) {
+ final Status grpcStatus = StatusUtils.toGrpcStatus(e.status());
+ call.close(grpcStatus, new Metadata());
+ return new NoopServerCallListener<>();
+ } catch (StatusRuntimeException e) {
+ Metadata trailers = e.getTrailers();
+ call.close(e.getStatus(), trailers == null ? new Metadata() : trailers);
+ return new NoopServerCallListener<>();
+ }
+
+ if (!peerIdentity.isPresent()) {
+ // Send back a description along with the status code
+ call.close(Status.UNAUTHENTICATED
+ .withDescription("Unauthenticated (invalid or missing auth token)"), new Metadata());
+ return new NoopServerCallListener<>();
+ }
+ return Contexts.interceptCall(Context.current().withValue(AuthConstants.PEER_IDENTITY_KEY, peerIdentity.get()),
+ call, headers, next);
+ }
+
+ return next.startCall(call, headers);
+ }
+
+ private Optional<String> isValid(Metadata headers) {
+ byte[] token = headers.get(AuthConstants.TOKEN_KEY);
+ return authHandler.isValid(token);
+ }
+
+ private static class NoopServerCallListener<T> extends ServerCall.Listener<T> {
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ServerAuthWrapper.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ServerAuthWrapper.java
new file mode 100644
index 000000000..ad1a36a93
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ServerAuthWrapper.java
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.auth;
+
+import java.util.Iterator;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Future;
+import java.util.concurrent.LinkedBlockingQueue;
+
+import org.apache.arrow.flight.CallStatus;
+import org.apache.arrow.flight.auth.ServerAuthHandler.ServerAuthSender;
+import org.apache.arrow.flight.grpc.StatusUtils;
+import org.apache.arrow.flight.impl.Flight.HandshakeRequest;
+import org.apache.arrow.flight.impl.Flight.HandshakeResponse;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.protobuf.ByteString;
+
+import io.grpc.stub.StreamObserver;
+
+/**
+ * Contains utility methods for integrating authorization into a GRPC stream.
+ */
+public class ServerAuthWrapper {
+ private static final Logger LOGGER = LoggerFactory.getLogger(ServerAuthWrapper.class);
+
+ /**
+ * Wrap the auth handler for handshake purposes.
+ *
+ * @param authHandler Authentication handler
+ * @param responseObserver Observer for handshake response
+ * @param executors ExecutorService
+ * @return AuthObserver
+ */
+ public static StreamObserver<HandshakeRequest> wrapHandshake(ServerAuthHandler authHandler,
+ StreamObserver<HandshakeResponse> responseObserver, ExecutorService executors) {
+
+ // stream started.
+ AuthObserver observer = new AuthObserver(responseObserver);
+ final Runnable r = () -> {
+ try {
+ if (authHandler.authenticate(observer.sender, observer.iter)) {
+ responseObserver.onCompleted();
+ return;
+ }
+
+ responseObserver.onError(StatusUtils.toGrpcException(CallStatus.UNAUTHENTICATED.toRuntimeException()));
+ } catch (Exception ex) {
+ LOGGER.error("Error during authentication", ex);
+ responseObserver.onError(StatusUtils.toGrpcException(ex));
+ }
+ };
+ observer.future = executors.submit(r);
+ return observer;
+ }
+
+ private static class AuthObserver implements StreamObserver<HandshakeRequest> {
+
+ private final StreamObserver<HandshakeResponse> responseObserver;
+ private volatile Future<?> future;
+ private volatile boolean completed = false;
+ private final LinkedBlockingQueue<byte[]> messages = new LinkedBlockingQueue<>();
+ private final AuthSender sender = new AuthSender();
+
+ public AuthObserver(StreamObserver<HandshakeResponse> responseObserver) {
+ super();
+ this.responseObserver = responseObserver;
+ }
+
+ @Override
+ public void onNext(HandshakeRequest value) {
+ ByteString payload = value.getPayload();
+ if (payload != null) {
+ messages.add(payload.toByteArray());
+ }
+ }
+
+ private Iterator<byte[]> iter = new Iterator<byte[]>() {
+
+ @Override
+ public byte[] next() {
+ while (!completed || !messages.isEmpty()) {
+ byte[] bytes = messages.poll();
+ if (bytes == null) {
+ //busy wait.
+ continue;
+ }
+ return bytes;
+ }
+ throw new IllegalStateException("Requesting more messages than client sent.");
+ }
+
+ @Override
+ public boolean hasNext() {
+ return !messages.isEmpty();
+ }
+ };
+
+ @Override
+ public void onError(Throwable t) {
+ completed = true;
+ while (future == null) {/* busy wait */}
+ future.cancel(true);
+ }
+
+ @Override
+ public void onCompleted() {
+ completed = true;
+ }
+
+ private class AuthSender implements ServerAuthSender {
+
+ @Override
+ public void send(byte[] payload) {
+ responseObserver.onNext(HandshakeResponse.newBuilder()
+ .setPayload(ByteString.copyFrom(payload))
+ .build());
+ }
+
+ @Override
+ public void onError(Throwable cause) {
+ responseObserver.onError(StatusUtils.toGrpcException(cause));
+ }
+
+ }
+ }
+
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/Auth2Constants.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/Auth2Constants.java
new file mode 100644
index 000000000..624d7d5ff
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/Auth2Constants.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.auth2;
+
+/**
+ * Constants used in authorization of flight connections.
+ */
+public final class Auth2Constants {
+ public static final String PEER_IDENTITY_KEY = "arrow-flight-peer-identity";
+ public static final String BEARER_PREFIX = "Bearer ";
+ public static final String BASIC_PREFIX = "Basic ";
+ public static final String AUTHORIZATION_HEADER = "Authorization";
+
+ private Auth2Constants() {
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/AuthUtilities.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/AuthUtilities.java
new file mode 100644
index 000000000..c73b7cf1a
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/AuthUtilities.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.auth2;
+
+import org.apache.arrow.flight.CallHeaders;
+
+/**
+ * Utility class for completing the auth process.
+ */
+public final class AuthUtilities {
+ private AuthUtilities() {
+
+ }
+
+ /**
+ * Helper method for retrieving a value from the Authorization header.
+ *
+ * @param headers The headers to inspect.
+ * @param valuePrefix The prefix within the value portion of the header to extract away.
+ * @return The header value.
+ */
+ public static String getValueFromAuthHeader(CallHeaders headers, String valuePrefix) {
+ final String authHeaderValue = headers.get(Auth2Constants.AUTHORIZATION_HEADER);
+ if (authHeaderValue != null) {
+ if (authHeaderValue.regionMatches(true, 0, valuePrefix, 0, valuePrefix.length())) {
+ return authHeaderValue.substring(valuePrefix.length());
+ }
+ }
+ return null;
+ }
+
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/BasicAuthCredentialWriter.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/BasicAuthCredentialWriter.java
new file mode 100644
index 000000000..698287e88
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/BasicAuthCredentialWriter.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.auth2;
+
+import java.nio.charset.StandardCharsets;
+import java.util.Base64;
+import java.util.function.Consumer;
+
+import org.apache.arrow.flight.CallHeaders;
+
+/**
+ * Client credentials that use a username and password.
+ */
+public final class BasicAuthCredentialWriter implements Consumer<CallHeaders> {
+
+ private final String name;
+ private final String password;
+
+ public BasicAuthCredentialWriter(String name, String password) {
+ this.name = name;
+ this.password = password;
+ }
+
+ @Override
+ public void accept(CallHeaders outputHeaders) {
+ outputHeaders.insert(Auth2Constants.AUTHORIZATION_HEADER, Auth2Constants.BASIC_PREFIX +
+ Base64.getEncoder().encodeToString(String.format("%s:%s", name, password).getBytes(StandardCharsets.UTF_8)));
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/BasicCallHeaderAuthenticator.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/BasicCallHeaderAuthenticator.java
new file mode 100644
index 000000000..fff7b4690
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/BasicCallHeaderAuthenticator.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.auth2;
+
+import java.io.UnsupportedEncodingException;
+import java.nio.charset.StandardCharsets;
+import java.util.Base64;
+
+import org.apache.arrow.flight.CallHeaders;
+import org.apache.arrow.flight.CallStatus;
+import org.apache.arrow.flight.FlightRuntimeException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A ServerAuthHandler for username/password authentication.
+ */
+public class BasicCallHeaderAuthenticator implements CallHeaderAuthenticator {
+
+ private static final Logger logger = LoggerFactory.getLogger(BasicCallHeaderAuthenticator.class);
+
+ private final CredentialValidator authValidator;
+
+ public BasicCallHeaderAuthenticator(CredentialValidator authValidator) {
+ this.authValidator = authValidator;
+ }
+
+ @Override
+ public AuthResult authenticate(CallHeaders incomingHeaders) {
+ try {
+ final String authEncoded = AuthUtilities.getValueFromAuthHeader(
+ incomingHeaders, Auth2Constants.BASIC_PREFIX);
+ if (authEncoded == null) {
+ throw CallStatus.UNAUTHENTICATED.toRuntimeException();
+ }
+ // The value has the format Base64(<username>:<password>)
+ final String authDecoded = new String(Base64.getDecoder().decode(authEncoded), StandardCharsets.UTF_8);
+ final int colonPos = authDecoded.indexOf(':');
+ if (colonPos == -1) {
+ throw CallStatus.UNAUTHENTICATED.toRuntimeException();
+ }
+
+ final String user = authDecoded.substring(0, colonPos);
+ final String password = authDecoded.substring(colonPos + 1);
+ return authValidator.validate(user, password);
+ } catch (UnsupportedEncodingException ex) {
+ // Note: Intentionally discarding the exception cause when reporting back to the client for security purposes.
+ logger.error("Authentication failed due to missing encoding.", ex);
+ throw CallStatus.INTERNAL.toRuntimeException();
+ } catch (FlightRuntimeException ex) {
+ throw ex;
+ } catch (Exception ex) {
+ // Note: Intentionally discarding the exception cause when reporting back to the client for security purposes.
+ logger.error("Authentication failed.", ex);
+ throw CallStatus.UNAUTHENTICATED.toRuntimeException();
+ }
+ }
+
+ /**
+ * Interface that this handler delegates to for validating the incoming headers.
+ */
+ public interface CredentialValidator {
+ /**
+ * Validate the supplied credentials (username/password) and return the peer identity.
+ *
+ * @param username The username to validate.
+ * @param password The password to validate.
+ * @return The peer identity if the supplied credentials are valid.
+ * @throws Exception If the supplied credentials are not valid.
+ */
+ AuthResult validate(String username, String password) throws Exception;
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/BearerCredentialWriter.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/BearerCredentialWriter.java
new file mode 100644
index 000000000..715ee502b
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/BearerCredentialWriter.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.auth2;
+
+import java.util.function.Consumer;
+
+import org.apache.arrow.flight.CallHeaders;
+
+/**
+ * Client credentials that use a bearer token.
+ */
+public final class BearerCredentialWriter implements Consumer<CallHeaders> {
+
+ private final String bearer;
+
+ public BearerCredentialWriter(String bearer) {
+ this.bearer = bearer;
+ }
+
+ @Override
+ public void accept(CallHeaders outputHeaders) {
+ outputHeaders.insert(Auth2Constants.AUTHORIZATION_HEADER, Auth2Constants.BEARER_PREFIX + bearer);
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/BearerTokenAuthenticator.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/BearerTokenAuthenticator.java
new file mode 100644
index 000000000..2006e0a2b
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/BearerTokenAuthenticator.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.auth2;
+
+import org.apache.arrow.flight.CallHeaders;
+
+/**
+ * Partial implementation of {@link CallHeaderAuthenticator} for bearer-token based authentication.
+ */
+public abstract class BearerTokenAuthenticator implements CallHeaderAuthenticator {
+
+ final CallHeaderAuthenticator initialAuthenticator;
+
+ public BearerTokenAuthenticator(CallHeaderAuthenticator initialAuthenticator) {
+ this.initialAuthenticator = initialAuthenticator;
+ }
+
+ @Override
+ public AuthResult authenticate(CallHeaders incomingHeaders) {
+ // Check if headers contain a bearer token and if so, validate the token.
+ final String bearerToken =
+ AuthUtilities.getValueFromAuthHeader(incomingHeaders, Auth2Constants.BEARER_PREFIX);
+ if (bearerToken != null) {
+ return validateBearer(bearerToken);
+ }
+
+ // Delegate to the basic auth handler to do the validation.
+ final CallHeaderAuthenticator.AuthResult result = initialAuthenticator.authenticate(incomingHeaders);
+ return getAuthResultWithBearerToken(result);
+ }
+
+ /**
+ * Callback to run when the initial authenticator succeeds.
+ * @param authResult A successful initial authentication result.
+ * @return an alternate AuthResult based on the original AuthResult that will write a bearer token to output headers.
+ */
+ protected abstract AuthResult getAuthResultWithBearerToken(AuthResult authResult);
+
+ /**
+ * Validate the bearer token.
+ * @param bearerToken The bearer token to validate.
+ * @return A successful AuthResult if validation succeeded.
+ * @throws Exception If the token validation fails.
+ */
+ protected abstract AuthResult validateBearer(String bearerToken);
+
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/CallHeaderAuthenticator.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/CallHeaderAuthenticator.java
new file mode 100644
index 000000000..87e60f1fa
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/CallHeaderAuthenticator.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.auth2;
+
+import org.apache.arrow.flight.CallHeaders;
+import org.apache.arrow.flight.FlightRuntimeException;
+
+/**
+ * Interface for Server side authentication handlers.
+ *
+ * A CallHeaderAuthenticator is used by {@link ServerCallHeaderAuthMiddleware} to validate headers sent by a Flight
+ * client for authentication purposes. The headers validated do not necessarily have to be Authorization headers.
+ *
+ * The workflow is that the FlightServer will intercept headers on a request, validate the headers, and
+ * either send back an UNAUTHENTICATED error, or succeed and potentially send back additional headers to the client.
+ *
+ * Implementations of CallHeaderAuthenticator should take care not to provide leak confidential details (such as
+ * indicating if usernames are valid or not) for security reasons when reporting errors back to clients.
+ *
+ * Example CallHeaderAuthenticators provided include:
+ * The {@link BasicCallHeaderAuthenticator} will authenticate basic HTTP credentials.
+ *
+ * The {@link BearerTokenAuthenticator} will authenticate basic HTTP credentials initially, then also send back a
+ * bearer token that the client can use for subsequent requests. The {@link GeneratedBearerTokenAuthenticator} will
+ * provide internally generated bearer tokens and maintain a cache of them.
+ */
+public interface CallHeaderAuthenticator {
+
+ /**
+ * Encapsulates the result of the {@link CallHeaderAuthenticator} analysis of headers.
+ *
+ * This includes the identity of the incoming user and any outbound headers to send as a response to the client.
+ */
+ interface AuthResult {
+ /**
+ * The peer identity that was determined by the handshake process based on the
+ * authentication credentials supplied by the client.
+ *
+ * @return The peer identity.
+ */
+ String getPeerIdentity();
+
+ /**
+ * Appends a header to the outgoing call headers.
+ * @param outgoingHeaders The outgoing headers.
+ */
+ default void appendToOutgoingHeaders(CallHeaders outgoingHeaders) {
+
+ }
+ }
+
+ /**
+ * Validate the auth headers sent by the client.
+ *
+ * @param incomingHeaders The incoming headers to authenticate.
+ * @return an auth result containing a peer identity and optionally a bearer token.
+ * @throws FlightRuntimeException with CallStatus.UNAUTHENTICATED if credentials were not supplied
+ * or if credentials were supplied but were not valid.
+ */
+ AuthResult authenticate(CallHeaders incomingHeaders);
+
+ /**
+ * An auth handler that does nothing.
+ */
+ CallHeaderAuthenticator NO_OP = new CallHeaderAuthenticator() {
+ @Override
+ public AuthResult authenticate(CallHeaders incomingHeaders) {
+ return () -> "";
+ }
+ };
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/ClientBearerHeaderHandler.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/ClientBearerHeaderHandler.java
new file mode 100644
index 000000000..45bdb6d95
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/ClientBearerHeaderHandler.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.auth2;
+
+import org.apache.arrow.flight.CallHeaders;
+import org.apache.arrow.flight.grpc.CredentialCallOption;
+
+/**
+ * A client header handler that parses the incoming headers for a bearer token.
+ */
+public class ClientBearerHeaderHandler implements ClientHeaderHandler {
+
+ @Override
+ public CredentialCallOption getCredentialCallOptionFromIncomingHeaders(CallHeaders incomingHeaders) {
+ final String bearerValue = AuthUtilities.getValueFromAuthHeader(incomingHeaders, Auth2Constants.BEARER_PREFIX);
+ if (bearerValue != null) {
+ return new CredentialCallOption(new BearerCredentialWriter(bearerValue));
+ }
+ return null;
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/ClientHandshakeWrapper.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/ClientHandshakeWrapper.java
new file mode 100644
index 000000000..16a514250
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/ClientHandshakeWrapper.java
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.auth2;
+
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ExecutionException;
+
+import org.apache.arrow.flight.CallStatus;
+import org.apache.arrow.flight.FlightRuntimeException;
+import org.apache.arrow.flight.grpc.StatusUtils;
+import org.apache.arrow.flight.impl.Flight.HandshakeRequest;
+import org.apache.arrow.flight.impl.Flight.HandshakeResponse;
+import org.apache.arrow.flight.impl.FlightServiceGrpc.FlightServiceStub;
+
+import io.grpc.StatusRuntimeException;
+import io.grpc.stub.StreamObserver;
+
+/**
+ * Utility class for executing a handshake with a FlightServer.
+ */
+public class ClientHandshakeWrapper {
+ private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(ClientHandshakeWrapper.class);
+
+ /**
+ * Do handshake for a client. The stub will be authenticated after this method returns.
+ *
+ * @param stub The service stub.
+ */
+ public static void doClientHandshake(FlightServiceStub stub) {
+ final HandshakeObserver observer = new HandshakeObserver();
+ try {
+ observer.requestObserver = stub.handshake(observer);
+ observer.requestObserver.onNext(HandshakeRequest.newBuilder().build());
+ observer.requestObserver.onCompleted();
+ try {
+ if (!observer.completed.get()) {
+ // TODO: ARROW-5681
+ throw CallStatus.UNAUTHENTICATED.toRuntimeException();
+ }
+ } catch (InterruptedException ex) {
+ Thread.currentThread().interrupt();
+ throw ex;
+ } catch (ExecutionException ex) {
+ final FlightRuntimeException wrappedException = StatusUtils.fromThrowable(ex.getCause());
+ logger.error("Failed on completing future", wrappedException);
+ throw wrappedException;
+ }
+ } catch (StatusRuntimeException sre) {
+ logger.error("Failed with SREe", sre);
+ throw StatusUtils.fromGrpcRuntimeException(sre);
+ } catch (Throwable ex) {
+ logger.error("Failed with unknown", ex);
+ if (ex instanceof FlightRuntimeException) {
+ throw (FlightRuntimeException) ex;
+ }
+ throw StatusUtils.fromThrowable(ex);
+ }
+ }
+
+ private static class HandshakeObserver implements StreamObserver<HandshakeResponse> {
+
+ private volatile StreamObserver<HandshakeRequest> requestObserver;
+ private final CompletableFuture<Boolean> completed;
+
+ public HandshakeObserver() {
+ super();
+ completed = new CompletableFuture<>();
+ }
+
+ @Override
+ public void onNext(HandshakeResponse value) {
+ }
+
+ @Override
+ public void onError(Throwable t) {
+ completed.completeExceptionally(t);
+ }
+
+ @Override
+ public void onCompleted() {
+ completed.complete(true);
+ }
+ }
+
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/ClientHeaderHandler.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/ClientHeaderHandler.java
new file mode 100644
index 000000000..514189f9b
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/ClientHeaderHandler.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.auth2;
+
+import org.apache.arrow.flight.CallHeaders;
+import org.apache.arrow.flight.grpc.CredentialCallOption;
+
+/**
+ * Interface for client side header parsing and conversion to CredentialCallOption.
+ */
+public interface ClientHeaderHandler {
+ /**
+ * Parses the incoming headers and converts them into a CredentialCallOption.
+ * @param incomingHeaders Incoming headers to parse.
+ * @return An instance of CredentialCallOption.
+ */
+ CredentialCallOption getCredentialCallOptionFromIncomingHeaders(CallHeaders incomingHeaders);
+
+ /**
+ * An client header handler that does nothing.
+ */
+ ClientHeaderHandler NO_OP = new ClientHeaderHandler() {
+ @Override
+ public CredentialCallOption getCredentialCallOptionFromIncomingHeaders(CallHeaders incomingHeaders) {
+ return null;
+ }
+ };
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/ClientIncomingAuthHeaderMiddleware.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/ClientIncomingAuthHeaderMiddleware.java
new file mode 100644
index 000000000..be5f3f54d
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/ClientIncomingAuthHeaderMiddleware.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.auth2;
+
+import org.apache.arrow.flight.CallHeaders;
+import org.apache.arrow.flight.CallInfo;
+import org.apache.arrow.flight.CallStatus;
+import org.apache.arrow.flight.FlightClientMiddleware;
+import org.apache.arrow.flight.grpc.CredentialCallOption;
+
+/**
+ * Middleware for capturing bearer tokens sent back from the Flight server.
+ */
+public class ClientIncomingAuthHeaderMiddleware implements FlightClientMiddleware {
+ private final Factory factory;
+
+ /**
+ * Factory used within FlightClient.
+ */
+ public static class Factory implements FlightClientMiddleware.Factory {
+ private final ClientHeaderHandler headerHandler;
+ private CredentialCallOption credentialCallOption;
+
+ /**
+ * Construct a factory with the given header handler.
+ * @param headerHandler The header handler that will be used for handling incoming headers from the flight server.
+ */
+ public Factory(ClientHeaderHandler headerHandler) {
+ this.headerHandler = headerHandler;
+ }
+
+ @Override
+ public FlightClientMiddleware onCallStarted(CallInfo info) {
+ return new ClientIncomingAuthHeaderMiddleware(this);
+ }
+
+ void setCredentialCallOption(CredentialCallOption callOption) {
+ this.credentialCallOption = callOption;
+ }
+
+ public CredentialCallOption getCredentialCallOption() {
+ return credentialCallOption;
+ }
+ }
+
+ private ClientIncomingAuthHeaderMiddleware(Factory factory) {
+ this.factory = factory;
+ }
+
+ @Override
+ public void onBeforeSendingHeaders(CallHeaders outgoingHeaders) {
+ }
+
+ @Override
+ public void onHeadersReceived(CallHeaders incomingHeaders) {
+ factory.setCredentialCallOption(
+ factory.headerHandler.getCredentialCallOptionFromIncomingHeaders(incomingHeaders));
+ }
+
+ @Override
+ public void onCallCompleted(CallStatus status) {
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/GeneratedBearerTokenAuthenticator.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/GeneratedBearerTokenAuthenticator.java
new file mode 100644
index 000000000..8b312b6b7
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/GeneratedBearerTokenAuthenticator.java
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.auth2;
+
+import java.nio.ByteBuffer;
+import java.util.Base64;
+import java.util.UUID;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.arrow.flight.CallHeaders;
+import org.apache.arrow.flight.CallStatus;
+import org.apache.arrow.flight.grpc.MetadataAdapter;
+
+import com.google.common.base.Strings;
+import com.google.common.cache.Cache;
+import com.google.common.cache.CacheBuilder;
+
+import io.grpc.Metadata;
+
+/**
+ * Generates and caches bearer tokens from user credentials.
+ */
+public class GeneratedBearerTokenAuthenticator extends BearerTokenAuthenticator {
+ private final Cache<String, String> bearerToIdentityCache;
+
+ /**
+ * Generate bearer tokens for the given basic call authenticator.
+ * @param authenticator The authenticator to initial validate inputs with.
+ */
+ public GeneratedBearerTokenAuthenticator(CallHeaderAuthenticator authenticator) {
+ this(authenticator, CacheBuilder.newBuilder().expireAfterAccess(2, TimeUnit.HOURS));
+ }
+
+ /**
+ * Generate bearer tokens for the given basic call authenticator.
+ * @param authenticator The authenticator to initial validate inputs with.
+ * @param timeoutMinutes The time before tokens expire after being accessed.
+ */
+ public GeneratedBearerTokenAuthenticator(CallHeaderAuthenticator authenticator, int timeoutMinutes) {
+ this(authenticator, CacheBuilder.newBuilder().expireAfterAccess(timeoutMinutes, TimeUnit.MINUTES));
+ }
+
+ /**
+ * Generate bearer tokens for the given basic call authenticator.
+ * @param authenticator The authenticator to initial validate inputs with.
+ * @param cacheBuilder The configuration of the cache of bearer tokens.
+ */
+ public GeneratedBearerTokenAuthenticator(CallHeaderAuthenticator authenticator,
+ CacheBuilder<Object, Object> cacheBuilder) {
+ super(authenticator);
+ bearerToIdentityCache = cacheBuilder.build();
+ }
+
+ @Override
+ protected AuthResult validateBearer(String bearerToken) {
+ final String peerIdentity = bearerToIdentityCache.getIfPresent(bearerToken);
+ if (peerIdentity == null) {
+ throw CallStatus.UNAUTHENTICATED.toRuntimeException();
+ }
+
+ return new AuthResult() {
+ @Override
+ public String getPeerIdentity() {
+ return peerIdentity;
+ }
+
+ @Override
+ public void appendToOutgoingHeaders(CallHeaders outgoingHeaders) {
+ if (null == AuthUtilities.getValueFromAuthHeader(outgoingHeaders, Auth2Constants.BEARER_PREFIX)) {
+ outgoingHeaders.insert(Auth2Constants.AUTHORIZATION_HEADER, Auth2Constants.BEARER_PREFIX + bearerToken);
+ }
+ }
+ };
+ }
+
+ @Override
+ protected AuthResult getAuthResultWithBearerToken(AuthResult authResult) {
+ // We generate a dummy header and call appendToOutgoingHeaders with it.
+ // We then inspect the dummy header and parse the bearer token if present in the header
+ // and generate a new bearer token if a bearer token is not present in the header.
+ final CallHeaders dummyHeaders = new MetadataAdapter(new Metadata());
+ authResult.appendToOutgoingHeaders(dummyHeaders);
+ String bearerToken =
+ AuthUtilities.getValueFromAuthHeader(dummyHeaders, Auth2Constants.BEARER_PREFIX);
+ final AuthResult authResultWithBearerToken;
+ if (Strings.isNullOrEmpty(bearerToken)) {
+ // Generate a new bearer token and return an AuthResult that can write it.
+ final UUID uuid = UUID.randomUUID();
+ final ByteBuffer byteBuffer = ByteBuffer.wrap(new byte[16]);
+ byteBuffer.putLong(uuid.getMostSignificantBits());
+ byteBuffer.putLong(uuid.getLeastSignificantBits());
+ final String newToken = Base64.getEncoder().encodeToString(byteBuffer.array());
+ bearerToken = newToken;
+ authResultWithBearerToken = new AuthResult() {
+ @Override
+ public String getPeerIdentity() {
+ return authResult.getPeerIdentity();
+ }
+
+ @Override
+ public void appendToOutgoingHeaders(CallHeaders outgoingHeaders) {
+ authResult.appendToOutgoingHeaders(outgoingHeaders);
+ outgoingHeaders.insert(Auth2Constants.AUTHORIZATION_HEADER, Auth2Constants.BEARER_PREFIX + newToken);
+ }
+ };
+ } else {
+ // Use the bearer token supplied by the original auth result.
+ authResultWithBearerToken = authResult;
+ }
+ bearerToIdentityCache.put(bearerToken, authResult.getPeerIdentity());
+ return authResultWithBearerToken;
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/ServerCallHeaderAuthMiddleware.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/ServerCallHeaderAuthMiddleware.java
new file mode 100644
index 000000000..9bfa73818
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/ServerCallHeaderAuthMiddleware.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.auth2;
+
+import static org.apache.arrow.flight.auth2.CallHeaderAuthenticator.AuthResult;
+
+import org.apache.arrow.flight.CallHeaders;
+import org.apache.arrow.flight.CallInfo;
+import org.apache.arrow.flight.CallStatus;
+import org.apache.arrow.flight.FlightServerMiddleware;
+import org.apache.arrow.flight.RequestContext;
+
+/**
+ * Middleware that's used to validate credentials during the handshake and verify
+ * the bearer token in subsequent requests.
+ */
+public class ServerCallHeaderAuthMiddleware implements FlightServerMiddleware {
+ /**
+ * Factory for accessing ServerAuthMiddleware.
+ */
+ public static class Factory implements FlightServerMiddleware.Factory<ServerCallHeaderAuthMiddleware> {
+ private final CallHeaderAuthenticator authHandler;
+
+ /**
+ * Construct a factory with the given auth handler.
+ * @param authHandler The auth handler what will be used for authenticating requests.
+ */
+ public Factory(CallHeaderAuthenticator authHandler) {
+ this.authHandler = authHandler;
+ }
+
+ @Override
+ public ServerCallHeaderAuthMiddleware onCallStarted(CallInfo callInfo, CallHeaders incomingHeaders,
+ RequestContext context) {
+ final AuthResult result = authHandler.authenticate(incomingHeaders);
+ context.put(Auth2Constants.PEER_IDENTITY_KEY, result.getPeerIdentity());
+ return new ServerCallHeaderAuthMiddleware(result);
+ }
+ }
+
+ private final AuthResult authResult;
+
+ public ServerCallHeaderAuthMiddleware(AuthResult authResult) {
+ this.authResult = authResult;
+ }
+
+ @Override
+ public void onBeforeSendingHeaders(CallHeaders outgoingHeaders) {
+ authResult.appendToOutgoingHeaders(outgoingHeaders);
+ }
+
+ @Override
+ public void onCallCompleted(CallStatus status) {
+ }
+
+ @Override
+ public void onCallErrored(Throwable err) {
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/client/ClientCookieMiddleware.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/client/ClientCookieMiddleware.java
new file mode 100644
index 000000000..56f24e101
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/client/ClientCookieMiddleware.java
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.client;
+
+import java.net.HttpCookie;
+import java.util.List;
+import java.util.Locale;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+import java.util.stream.Collectors;
+
+import org.apache.arrow.flight.CallHeaders;
+import org.apache.arrow.flight.CallInfo;
+import org.apache.arrow.flight.CallStatus;
+import org.apache.arrow.flight.FlightClientMiddleware;
+import org.apache.arrow.util.VisibleForTesting;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A client middleware for receiving and sending cookie information.
+ * Note that this class will not persist permanent cookies beyond the lifetime
+ * of this session.
+ *
+ * This middleware will automatically remove cookies that have expired.
+ * <b>Note</b>: Negative max-age values currently do not get marked as expired due to
+ * a JDK issue. Use max-age=0 to explicitly remove an existing cookie.
+ */
+public class ClientCookieMiddleware implements FlightClientMiddleware {
+ private static final Logger LOGGER = LoggerFactory.getLogger(ClientCookieMiddleware.class);
+
+ private static final String SET_COOKIE_HEADER = "Set-Cookie";
+ private static final String COOKIE_HEADER = "Cookie";
+
+ private final Factory factory;
+
+ @VisibleForTesting
+ ClientCookieMiddleware(Factory factory) {
+ this.factory = factory;
+ }
+
+ /**
+ * Factory used within FlightClient.
+ */
+ public static class Factory implements FlightClientMiddleware.Factory {
+ // Use a map to track the most recent version of a cookie from the server.
+ // Note that cookie names are case-sensitive (but header names aren't).
+ private ConcurrentMap<String, HttpCookie> cookies = new ConcurrentHashMap<>();
+
+ @Override
+ public ClientCookieMiddleware onCallStarted(CallInfo info) {
+ return new ClientCookieMiddleware(this);
+ }
+
+ private void updateCookies(Iterable<String> newCookieHeaderValues) {
+ // Note: Intentionally overwrite existing cookie values.
+ // A cookie defined once will continue to be used in all subsequent
+ // requests on the client instance. The server can send the same cookie again
+ // with a different value and the client will use the new value in future requests.
+ // The server can also update a cookie to have an Expiry in the past or negative age
+ // to signal that the client should stop using the cookie immediately.
+ newCookieHeaderValues.forEach(headerValue -> {
+ try {
+ final List<HttpCookie> parsedCookies = HttpCookie.parse(headerValue);
+ parsedCookies.forEach(parsedCookie -> {
+ final String cookieNameLc = parsedCookie.getName().toLowerCase(Locale.ENGLISH);
+ if (parsedCookie.hasExpired()) {
+ cookies.remove(cookieNameLc);
+ } else {
+ cookies.put(parsedCookie.getName().toLowerCase(Locale.ENGLISH), parsedCookie);
+ }
+ });
+ } catch (IllegalArgumentException ex) {
+ LOGGER.warn("Skipping incorrectly formatted Set-Cookie header with value '{}'.", headerValue);
+ }
+ });
+ }
+ }
+
+ @Override
+ public void onBeforeSendingHeaders(CallHeaders outgoingHeaders) {
+ final String cookieValue = getValidCookiesAsString();
+ if (!cookieValue.isEmpty()) {
+ outgoingHeaders.insert(COOKIE_HEADER, cookieValue);
+ }
+ }
+
+ @Override
+ public void onHeadersReceived(CallHeaders incomingHeaders) {
+ final Iterable<String> setCookieHeaders = incomingHeaders.getAll(SET_COOKIE_HEADER);
+ if (setCookieHeaders != null) {
+ factory.updateCookies(setCookieHeaders);
+ }
+ }
+
+ @Override
+ public void onCallCompleted(CallStatus status) {
+
+ }
+
+ /**
+ * Discards expired cookies and returns the valid cookies as a String delimited by ';'.
+ */
+ @VisibleForTesting
+ String getValidCookiesAsString() {
+ // Discard expired cookies.
+ factory.cookies.entrySet().removeIf(cookieEntry -> cookieEntry.getValue().hasExpired());
+
+ // Cookie header value format:
+ // [<cookie-name1>=<cookie-value1>; <cookie-name2>=<cookie-value2; ...]
+ return factory.cookies.entrySet().stream()
+ .map(cookie -> cookie.getValue().toString())
+ .collect(Collectors.joining("; "));
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/ExampleFlightServer.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/ExampleFlightServer.java
new file mode 100644
index 000000000..528c227df
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/ExampleFlightServer.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.example;
+
+import java.io.IOException;
+
+import org.apache.arrow.flight.FlightServer;
+import org.apache.arrow.flight.Location;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.util.AutoCloseables;
+
+/**
+ * An Example Flight Server that provides access to the InMemoryStore. Used for integration testing.
+ */
+public class ExampleFlightServer implements AutoCloseable {
+
+ private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(ExampleFlightServer.class);
+
+ private final FlightServer flightServer;
+ private final Location location;
+ private final BufferAllocator allocator;
+ private final InMemoryStore mem;
+
+ /**
+ * Constructs a new instance using Allocator for allocating buffer storage that binds
+ * to the given location.
+ */
+ public ExampleFlightServer(BufferAllocator allocator, Location location) {
+ this.allocator = allocator.newChildAllocator("flight-server", 0, Long.MAX_VALUE);
+ this.location = location;
+ this.mem = new InMemoryStore(this.allocator, location);
+ this.flightServer = FlightServer.builder(allocator, location, mem).build();
+ }
+
+ public Location getLocation() {
+ return location;
+ }
+
+ public int getPort() {
+ return this.flightServer.getPort();
+ }
+
+ public void start() throws IOException {
+ flightServer.start();
+ }
+
+ public void awaitTermination() throws InterruptedException {
+ flightServer.awaitTermination();
+ }
+
+ public InMemoryStore getStore() {
+ return mem;
+ }
+
+ @Override
+ public void close() throws Exception {
+ AutoCloseables.close(mem, flightServer, allocator);
+ }
+
+ /**
+ * Main method starts the server listening to localhost:12233.
+ */
+ public static void main(String[] args) throws Exception {
+ final BufferAllocator a = new RootAllocator(Long.MAX_VALUE);
+ final ExampleFlightServer efs = new ExampleFlightServer(a, Location.forGrpcInsecure("localhost", 12233));
+ efs.start();
+ Runtime.getRuntime().addShutdownHook(new Thread(() -> {
+ try {
+ System.out.println("\nExiting...");
+ AutoCloseables.close(efs, a);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }));
+ efs.awaitTermination();
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/ExampleTicket.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/ExampleTicket.java
new file mode 100644
index 000000000..e15ecd034
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/ExampleTicket.java
@@ -0,0 +1,141 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.example;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.arrow.flight.Ticket;
+import org.apache.arrow.util.Preconditions;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.annotation.JsonSerialize;
+import com.google.common.base.Throwables;
+
+/**
+ * POJO object used to demonstrate how an opaque ticket can be generated.
+ */
+@JsonSerialize
+public class ExampleTicket {
+
+ private static final ObjectMapper MAPPER = new ObjectMapper();
+
+ private final List<String> path;
+ private final int ordinal;
+
+ // uuid to ensure that a stream from one node is not recreated on another node and mixed up.
+ private final String uuid;
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param path Path to data
+ * @param ordinal A counter for the stream.
+ * @param uuid A unique identifier for this particular stream.
+ */
+ @JsonCreator
+ public ExampleTicket(@JsonProperty("path") List<String> path, @JsonProperty("ordinal") int ordinal,
+ @JsonProperty("uuid") String uuid) {
+ super();
+ Preconditions.checkArgument(ordinal >= 0);
+ this.path = path;
+ this.ordinal = ordinal;
+ this.uuid = uuid;
+ }
+
+ public List<String> getPath() {
+ return path;
+ }
+
+ public int getOrdinal() {
+ return ordinal;
+ }
+
+ public String getUuid() {
+ return uuid;
+ }
+
+ /**
+ * Deserializes a new instance from the protocol buffer ticket.
+ */
+ public static ExampleTicket from(Ticket ticket) {
+ try {
+ return MAPPER.readValue(ticket.getBytes(), ExampleTicket.class);
+ } catch (IOException e) {
+ throw Throwables.propagate(e);
+ }
+ }
+
+ /**
+ * Creates a new protocol buffer Ticket by serializing to JSON.
+ */
+ public Ticket toTicket() {
+ try {
+ return new Ticket(MAPPER.writeValueAsBytes(this));
+ } catch (JsonProcessingException e) {
+ throw Throwables.propagate(e);
+ }
+ }
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + ordinal;
+ result = prime * result + ((path == null) ? 0 : path.hashCode());
+ result = prime * result + ((uuid == null) ? 0 : uuid.hashCode());
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj) {
+ return true;
+ }
+ if (obj == null) {
+ return false;
+ }
+ if (getClass() != obj.getClass()) {
+ return false;
+ }
+ ExampleTicket other = (ExampleTicket) obj;
+ if (ordinal != other.ordinal) {
+ return false;
+ }
+ if (path == null) {
+ if (other.path != null) {
+ return false;
+ }
+ } else if (!path.equals(other.path)) {
+ return false;
+ }
+ if (uuid == null) {
+ if (other.uuid != null) {
+ return false;
+ }
+ } else if (!uuid.equals(other.uuid)) {
+ return false;
+ }
+ return true;
+ }
+
+
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/FlightHolder.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/FlightHolder.java
new file mode 100644
index 000000000..f6295211e
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/FlightHolder.java
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.example;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.CopyOnWriteArrayList;
+import java.util.stream.Collectors;
+
+import org.apache.arrow.flight.FlightDescriptor;
+import org.apache.arrow.flight.FlightEndpoint;
+import org.apache.arrow.flight.FlightInfo;
+import org.apache.arrow.flight.Location;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.util.AutoCloseables;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.util.DictionaryUtility;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterables;
+
+/**
+ * A logical collection of streams sharing the same schema.
+ */
+public class FlightHolder implements AutoCloseable {
+
+ private final BufferAllocator allocator;
+ private final FlightDescriptor descriptor;
+ private final Schema schema;
+ private final List<Stream> streams = new CopyOnWriteArrayList<>();
+ private final DictionaryProvider dictionaryProvider;
+
+ /**
+ * Creates a new instance.
+ * @param allocator The allocator to use for allocating buffers to store data.
+ * @param descriptor The descriptor for the streams.
+ * @param schema The schema for the stream.
+ * @param dictionaryProvider The dictionary provider for the stream.
+ */
+ public FlightHolder(BufferAllocator allocator, FlightDescriptor descriptor, Schema schema,
+ DictionaryProvider dictionaryProvider) {
+ Preconditions.checkArgument(!descriptor.isCommand());
+ this.allocator = allocator.newChildAllocator(descriptor.toString(), 0, Long.MAX_VALUE);
+ this.descriptor = descriptor;
+ this.schema = schema;
+ this.dictionaryProvider = dictionaryProvider;
+ }
+
+ /**
+ * Returns the stream based on the ordinal of ExampleTicket.
+ */
+ public Stream getStream(ExampleTicket ticket) {
+ Preconditions.checkArgument(ticket.getOrdinal() < streams.size(), "Unknown stream.");
+ Stream stream = streams.get(ticket.getOrdinal());
+ stream.verify(ticket);
+ return stream;
+ }
+
+ /**
+ * Adds a new streams which clients can populate via the returned object.
+ */
+ public Stream.StreamCreator addStream(Schema schema) {
+ Preconditions.checkArgument(this.schema.equals(schema), "Stream schema inconsistent with existing schema.");
+ return new Stream.StreamCreator(schema, dictionaryProvider, allocator, t -> {
+ synchronized (streams) {
+ streams.add(t);
+ }
+ });
+ }
+
+ /**
+ * List all available streams as being available at <code>l</code>.
+ */
+ public FlightInfo getFlightInfo(final Location l) {
+ final long bytes = allocator.getAllocatedMemory();
+ final long records = streams.stream().collect(Collectors.summingLong(t -> t.getRecordCount()));
+
+ final List<FlightEndpoint> endpoints = new ArrayList<>();
+ int i = 0;
+ for (Stream s : streams) {
+ endpoints.add(
+ new FlightEndpoint(
+ new ExampleTicket(descriptor.getPath(), i, s.getUuid())
+ .toTicket(),
+ l));
+ i++;
+ }
+ return new FlightInfo(messageFormatSchema(), descriptor, endpoints, bytes, records);
+ }
+
+ private Schema messageFormatSchema() {
+ Set<Long> dictionaryIdsUsed = new HashSet<>();
+ List<Field> messageFormatFields = schema.getFields()
+ .stream()
+ .map(f -> DictionaryUtility.toMessageFormat(f, dictionaryProvider, dictionaryIdsUsed))
+ .collect(Collectors.toList());
+ return new Schema(messageFormatFields, schema.getCustomMetadata());
+ }
+
+ @Override
+ public void close() throws Exception {
+ // Close dictionaries
+ final Set<Long> dictionaryIds = new HashSet<>();
+ schema.getFields().forEach(field -> DictionaryUtility.toMessageFormat(field, dictionaryProvider, dictionaryIds));
+
+ final Iterable<AutoCloseable> dictionaries = dictionaryIds.stream()
+ .map(id -> (AutoCloseable) dictionaryProvider.lookup(id).getVector())::iterator;
+
+ AutoCloseables.close(Iterables.concat(streams, ImmutableList.of(allocator), dictionaries));
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/InMemoryStore.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/InMemoryStore.java
new file mode 100644
index 000000000..ff796718d
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/InMemoryStore.java
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.example;
+
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+
+import org.apache.arrow.flight.Action;
+import org.apache.arrow.flight.ActionType;
+import org.apache.arrow.flight.CallStatus;
+import org.apache.arrow.flight.Criteria;
+import org.apache.arrow.flight.FlightDescriptor;
+import org.apache.arrow.flight.FlightInfo;
+import org.apache.arrow.flight.FlightProducer;
+import org.apache.arrow.flight.FlightStream;
+import org.apache.arrow.flight.Location;
+import org.apache.arrow.flight.PutResult;
+import org.apache.arrow.flight.Result;
+import org.apache.arrow.flight.Ticket;
+import org.apache.arrow.flight.example.Stream.StreamCreator;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.util.AutoCloseables;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.VectorUnloader;
+
+/**
+ * A FlightProducer that hosts an in memory store of Arrow buffers. Used for integration testing.
+ */
+public class InMemoryStore implements FlightProducer, AutoCloseable {
+
+ private final ConcurrentMap<FlightDescriptor, FlightHolder> holders = new ConcurrentHashMap<>();
+ private final BufferAllocator allocator;
+ private Location location;
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param allocator The allocator for creating new Arrow buffers.
+ * @param location The location of the storage.
+ */
+ public InMemoryStore(BufferAllocator allocator, Location location) {
+ super();
+ this.allocator = allocator;
+ this.location = location;
+ }
+
+ /**
+ * Update the location after server start.
+ *
+ * <p>Useful for binding to port 0 to get a free port.
+ */
+ public void setLocation(Location location) {
+ this.location = location;
+ }
+
+ @Override
+ public void getStream(CallContext context, Ticket ticket,
+ ServerStreamListener listener) {
+ getStream(ticket).sendTo(allocator, listener);
+ }
+
+ /**
+ * Returns the appropriate stream given the ticket (streams are indexed by path and an ordinal).
+ */
+ public Stream getStream(Ticket t) {
+ ExampleTicket example = ExampleTicket.from(t);
+ FlightDescriptor d = FlightDescriptor.path(example.getPath());
+ FlightHolder h = holders.get(d);
+ if (h == null) {
+ throw new IllegalStateException("Unknown ticket.");
+ }
+
+ return h.getStream(example);
+ }
+
+ @Override
+ public void listFlights(CallContext context, Criteria criteria, StreamListener<FlightInfo> listener) {
+ try {
+ for (FlightHolder h : holders.values()) {
+ listener.onNext(h.getFlightInfo(location));
+ }
+ listener.onCompleted();
+ } catch (Exception ex) {
+ listener.onError(ex);
+ }
+ }
+
+ @Override
+ public FlightInfo getFlightInfo(CallContext context, FlightDescriptor descriptor) {
+ FlightHolder h = holders.get(descriptor);
+ if (h == null) {
+ throw new IllegalStateException("Unknown descriptor.");
+ }
+
+ return h.getFlightInfo(location);
+ }
+
+ @Override
+ public Runnable acceptPut(CallContext context,
+ final FlightStream flightStream, final StreamListener<PutResult> ackStream) {
+ return () -> {
+ StreamCreator creator = null;
+ boolean success = false;
+ try (VectorSchemaRoot root = flightStream.getRoot()) {
+ final FlightHolder h = holders.computeIfAbsent(
+ flightStream.getDescriptor(),
+ t -> new FlightHolder(allocator, t, flightStream.getSchema(), flightStream.getDictionaryProvider()));
+
+ creator = h.addStream(flightStream.getSchema());
+
+ VectorUnloader unloader = new VectorUnloader(root);
+ while (flightStream.next()) {
+ ackStream.onNext(PutResult.metadata(flightStream.getLatestMetadata()));
+ creator.add(unloader.getRecordBatch());
+ }
+ // Closing the stream will release the dictionaries
+ flightStream.takeDictionaryOwnership();
+ creator.complete();
+ success = true;
+ } finally {
+ if (!success) {
+ creator.drop();
+ }
+ }
+
+ };
+
+ }
+
+ @Override
+ public void doAction(CallContext context, Action action,
+ StreamListener<Result> listener) {
+ switch (action.getType()) {
+ case "drop": {
+ // not implemented.
+ listener.onNext(new Result(new byte[0]));
+ listener.onCompleted();
+ break;
+ }
+ default: {
+ listener.onError(CallStatus.UNIMPLEMENTED.toRuntimeException());
+ }
+ }
+ }
+
+ @Override
+ public void listActions(CallContext context,
+ StreamListener<ActionType> listener) {
+ listener.onNext(new ActionType("get", "pull a stream. Action must be done via standard get mechanism"));
+ listener.onNext(new ActionType("put", "push a stream. Action must be done via standard put mechanism"));
+ listener.onNext(new ActionType("drop", "delete a flight. Action body is a JSON encoded path."));
+ listener.onCompleted();
+ }
+
+ @Override
+ public void close() throws Exception {
+ AutoCloseables.close(holders.values());
+ holders.clear();
+ }
+
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/Stream.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/Stream.java
new file mode 100644
index 000000000..0bc35798d
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/Stream.java
@@ -0,0 +1,177 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.example;
+
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.UUID;
+import java.util.function.Consumer;
+
+import org.apache.arrow.flight.FlightProducer.ServerStreamListener;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.util.AutoCloseables;
+import org.apache.arrow.vector.VectorLoader;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+import com.google.common.base.Throwables;
+import com.google.common.collect.ImmutableList;
+
+/**
+ * A collection of Arrow record batches.
+ */
+public class Stream implements AutoCloseable, Iterable<ArrowRecordBatch> {
+
+ private final String uuid = UUID.randomUUID().toString();
+ private final DictionaryProvider dictionaryProvider;
+ private final List<ArrowRecordBatch> batches;
+ private final Schema schema;
+ private final long recordCount;
+
+ /**
+ * Create a new instance.
+ *
+ * @param schema The schema for the record batches.
+ * @param batches The data associated with the stream.
+ * @param recordCount The total record count across all batches.
+ */
+ public Stream(
+ final Schema schema,
+ final DictionaryProvider dictionaryProvider,
+ List<ArrowRecordBatch> batches,
+ long recordCount) {
+ this.schema = schema;
+ this.dictionaryProvider = dictionaryProvider;
+ this.batches = ImmutableList.copyOf(batches);
+ this.recordCount = recordCount;
+ }
+
+ public Schema getSchema() {
+ return schema;
+ }
+
+ @Override
+ public Iterator<ArrowRecordBatch> iterator() {
+ return batches.iterator();
+ }
+
+ public long getRecordCount() {
+ return recordCount;
+ }
+
+ public String getUuid() {
+ return uuid;
+ }
+
+ /**
+ * Sends that data from this object to the given listener.
+ */
+ public void sendTo(BufferAllocator allocator, ServerStreamListener listener) {
+ try (VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) {
+ listener.start(root, dictionaryProvider);
+ final VectorLoader loader = new VectorLoader(root);
+ int counter = 0;
+ for (ArrowRecordBatch batch : batches) {
+ final byte[] rawMetadata = Integer.toString(counter).getBytes(StandardCharsets.UTF_8);
+ final ArrowBuf metadata = allocator.buffer(rawMetadata.length);
+ metadata.writeBytes(rawMetadata);
+ loader.load(batch);
+ // Transfers ownership of the buffer - do not free buffer ourselves
+ listener.putNext(metadata);
+ counter++;
+ }
+ listener.completed();
+ } catch (Exception ex) {
+ listener.error(ex);
+ }
+ }
+
+ /**
+ * Throws an IllegalStateException if the given ticket doesn't correspond to this stream.
+ */
+ public void verify(ExampleTicket ticket) {
+ if (!uuid.equals(ticket.getUuid())) {
+ throw new IllegalStateException("Ticket doesn't match.");
+ }
+ }
+
+ @Override
+ public void close() throws Exception {
+ AutoCloseables.close(batches);
+ }
+
+ /**
+ * Provides the functionality to create a new stream by adding batches serially.
+ */
+ public static class StreamCreator {
+
+ private final Schema schema;
+ private final BufferAllocator allocator;
+ private final List<ArrowRecordBatch> batches = new ArrayList<>();
+ private final Consumer<Stream> committer;
+ private long recordCount = 0;
+ private DictionaryProvider dictionaryProvider;
+
+ /**
+ * Creates a new instance.
+ *
+ * @param schema The schema for batches in the stream.
+ * @param dictionaryProvider The dictionary provider for the stream.
+ * @param allocator The allocator used to copy data permanently into the stream.
+ * @param committer A callback for when the stream is ready to be finalized (no more batches).
+ */
+ public StreamCreator(Schema schema, DictionaryProvider dictionaryProvider,
+ BufferAllocator allocator, Consumer<Stream> committer) {
+ this.allocator = allocator;
+ this.committer = committer;
+ this.schema = schema;
+ this.dictionaryProvider = dictionaryProvider;
+ }
+
+ /**
+ * Abandon creation of the stream.
+ */
+ public void drop() {
+ try {
+ AutoCloseables.close(batches);
+ } catch (Exception ex) {
+ throw Throwables.propagate(ex);
+ }
+ }
+
+ public void add(ArrowRecordBatch batch) {
+ batches.add(batch.cloneWithTransfer(allocator));
+ recordCount += batch.getLength();
+ }
+
+ /**
+ * Complete building the stream (no more batches can be added).
+ */
+ public void complete() {
+ Stream stream = new Stream(schema, dictionaryProvider, batches, recordCount);
+ committer.accept(stream);
+ }
+
+ }
+
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/integration/AuthBasicProtoScenario.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/integration/AuthBasicProtoScenario.java
new file mode 100644
index 000000000..3955d7d21
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/integration/AuthBasicProtoScenario.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.example.integration;
+
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.Optional;
+
+import org.apache.arrow.flight.Action;
+import org.apache.arrow.flight.CallStatus;
+import org.apache.arrow.flight.FlightClient;
+import org.apache.arrow.flight.FlightProducer;
+import org.apache.arrow.flight.FlightRuntimeException;
+import org.apache.arrow.flight.FlightServer;
+import org.apache.arrow.flight.FlightStatusCode;
+import org.apache.arrow.flight.Location;
+import org.apache.arrow.flight.NoOpFlightProducer;
+import org.apache.arrow.flight.Result;
+import org.apache.arrow.flight.auth.BasicClientAuthHandler;
+import org.apache.arrow.flight.auth.BasicServerAuthHandler;
+import org.apache.arrow.memory.BufferAllocator;
+
+/**
+ * A scenario testing the built-in basic authentication Protobuf.
+ */
+final class AuthBasicProtoScenario implements Scenario {
+
+ static final String USERNAME = "arrow";
+ static final String PASSWORD = "flight";
+
+ @Override
+ public FlightProducer producer(BufferAllocator allocator, Location location) {
+ return new NoOpFlightProducer() {
+ @Override
+ public void doAction(CallContext context, Action action, StreamListener<Result> listener) {
+ listener.onNext(new Result(context.peerIdentity().getBytes(StandardCharsets.UTF_8)));
+ listener.onCompleted();
+ }
+ };
+ }
+
+ @Override
+ public void buildServer(FlightServer.Builder builder) {
+ builder.authHandler(new BasicServerAuthHandler(new BasicServerAuthHandler.BasicAuthValidator() {
+ @Override
+ public byte[] getToken(String username, String password) throws Exception {
+ if (!USERNAME.equals(username) || !PASSWORD.equals(password)) {
+ throw CallStatus.UNAUTHENTICATED.withDescription("Username or password is invalid.").toRuntimeException();
+ }
+ return ("valid:" + username).getBytes(StandardCharsets.UTF_8);
+ }
+
+ @Override
+ public Optional<String> isValid(byte[] token) {
+ if (token != null) {
+ final String credential = new String(token, StandardCharsets.UTF_8);
+ if (credential.startsWith("valid:")) {
+ return Optional.of(credential.substring(6));
+ }
+ }
+ return Optional.empty();
+ }
+ }));
+ }
+
+ @Override
+ public void client(BufferAllocator allocator, Location location, FlightClient client) {
+ final FlightRuntimeException e = IntegrationAssertions.assertThrows(FlightRuntimeException.class, () -> {
+ client.listActions().forEach(act -> {
+ });
+ });
+ if (!FlightStatusCode.UNAUTHENTICATED.equals(e.status().code())) {
+ throw new AssertionError("Expected UNAUTHENTICATED but found " + e.status().code(), e);
+ }
+
+ client.authenticate(new BasicClientAuthHandler(USERNAME, PASSWORD));
+ final Result result = client.doAction(new Action("")).next();
+ if (!USERNAME.equals(new String(result.getBody(), StandardCharsets.UTF_8))) {
+ throw new AssertionError("Expected " + USERNAME + " but got " + Arrays.toString(result.getBody()));
+ }
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/integration/IntegrationAssertions.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/integration/IntegrationAssertions.java
new file mode 100644
index 000000000..576d1887f
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/integration/IntegrationAssertions.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.example.integration;
+
+import java.util.Objects;
+
+/**
+ * Utility methods to implement integration tests without using JUnit assertions.
+ */
+final class IntegrationAssertions {
+
+ /**
+ * Assert that the given code throws the given exception or subclass thereof.
+ *
+ * @param clazz The exception type.
+ * @param body The code to run.
+ * @param <T> The exception type.
+ * @return The thrown exception.
+ */
+ @SuppressWarnings("unchecked")
+ static <T extends Throwable> T assertThrows(Class<T> clazz, AssertThrows body) {
+ try {
+ body.run();
+ } catch (Throwable t) {
+ if (clazz.isInstance(t)) {
+ return (T) t;
+ }
+ throw new AssertionError("Expected exception of class " + clazz + " but got " + t.getClass(), t);
+ }
+ throw new AssertionError("Expected exception of class " + clazz + " but did not throw.");
+ }
+
+ /**
+ * Assert that the two (non-array) objects are equal.
+ */
+ static void assertEquals(Object expected, Object actual) {
+ if (!Objects.equals(expected, actual)) {
+ throw new AssertionError("Expected:\n" + expected + "\nbut got:\n" + actual);
+ }
+ }
+
+ /**
+ * Assert that the value is false, using the given message as an error otherwise.
+ */
+ static void assertFalse(String message, boolean value) {
+ if (value) {
+ throw new AssertionError("Expected false: " + message);
+ }
+ }
+
+ /**
+ * An interface used with {@link #assertThrows(Class, AssertThrows)}.
+ */
+ @FunctionalInterface
+ interface AssertThrows {
+
+ void run() throws Throwable;
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/integration/IntegrationTestClient.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/integration/IntegrationTestClient.java
new file mode 100644
index 000000000..27a545f84
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/integration/IntegrationTestClient.java
@@ -0,0 +1,197 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.example.integration;
+
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.List;
+
+import org.apache.arrow.flight.AsyncPutListener;
+import org.apache.arrow.flight.FlightClient;
+import org.apache.arrow.flight.FlightDescriptor;
+import org.apache.arrow.flight.FlightEndpoint;
+import org.apache.arrow.flight.FlightInfo;
+import org.apache.arrow.flight.FlightStream;
+import org.apache.arrow.flight.Location;
+import org.apache.arrow.flight.PutResult;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.VectorLoader;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.VectorUnloader;
+import org.apache.arrow.vector.ipc.JsonFileReader;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.util.Validator;
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.DefaultParser;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+
+/**
+ * A Flight client for integration testing.
+ */
+class IntegrationTestClient {
+ private static final org.slf4j.Logger LOGGER = org.slf4j.LoggerFactory.getLogger(IntegrationTestClient.class);
+ private final Options options;
+
+ private IntegrationTestClient() {
+ options = new Options();
+ options.addOption("j", "json", true, "json file");
+ options.addOption("scenario", true, "The integration test scenario.");
+ options.addOption("host", true, "The host to connect to.");
+ options.addOption("port", true, "The port to connect to.");
+ }
+
+ public static void main(String[] args) {
+ try {
+ new IntegrationTestClient().run(args);
+ } catch (ParseException e) {
+ fatalError("Invalid parameters", e);
+ } catch (IOException e) {
+ fatalError("Error accessing files", e);
+ } catch (Exception e) {
+ fatalError("Unknown error", e);
+ }
+ }
+
+ private static void fatalError(String message, Throwable e) {
+ System.err.println(message);
+ System.err.println(e.getMessage());
+ LOGGER.error(message, e);
+ System.exit(1);
+ }
+
+ private void run(String[] args) throws Exception {
+ final CommandLineParser parser = new DefaultParser();
+ final CommandLine cmd = parser.parse(options, args, false);
+
+ final String host = cmd.getOptionValue("host", "localhost");
+ final int port = Integer.parseInt(cmd.getOptionValue("port", "31337"));
+
+ final Location defaultLocation = Location.forGrpcInsecure(host, port);
+ try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+ final FlightClient client = FlightClient.builder(allocator, defaultLocation).build()) {
+
+ if (cmd.hasOption("scenario")) {
+ Scenarios.getScenario(cmd.getOptionValue("scenario")).client(allocator, defaultLocation, client);
+ } else {
+ final String inputPath = cmd.getOptionValue("j");
+ testStream(allocator, defaultLocation, client, inputPath);
+ }
+ } catch (InterruptedException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private static void testStream(BufferAllocator allocator, Location server, FlightClient client, String inputPath)
+ throws IOException {
+ // 1. Read data from JSON and upload to server.
+ FlightDescriptor descriptor = FlightDescriptor.path(inputPath);
+ try (JsonFileReader reader = new JsonFileReader(new File(inputPath), allocator);
+ VectorSchemaRoot root = VectorSchemaRoot.create(reader.start(), allocator)) {
+ FlightClient.ClientStreamListener stream = client.startPut(descriptor, root, reader,
+ new AsyncPutListener() {
+ int counter = 0;
+
+ @Override
+ public void onNext(PutResult val) {
+ final byte[] metadataRaw = new byte[checkedCastToInt(val.getApplicationMetadata().readableBytes())];
+ val.getApplicationMetadata().readBytes(metadataRaw);
+ final String metadata = new String(metadataRaw, StandardCharsets.UTF_8);
+ if (!Integer.toString(counter).equals(metadata)) {
+ throw new RuntimeException(
+ String.format("Invalid ACK from server. Expected '%d' but got '%s'.", counter, metadata));
+ }
+ counter++;
+ }
+ });
+ int counter = 0;
+ while (reader.read(root)) {
+ final byte[] rawMetadata = Integer.toString(counter).getBytes(StandardCharsets.UTF_8);
+ final ArrowBuf metadata = allocator.buffer(rawMetadata.length);
+ metadata.writeBytes(rawMetadata);
+ // Transfers ownership of the buffer, so do not release it ourselves
+ stream.putNext(metadata);
+ root.clear();
+ counter++;
+ }
+ stream.completed();
+ // Need to call this, or exceptions from the server get swallowed
+ stream.getResult();
+ }
+
+ // 2. Get the ticket for the data.
+ FlightInfo info = client.getInfo(descriptor);
+ List<FlightEndpoint> endpoints = info.getEndpoints();
+ if (endpoints.isEmpty()) {
+ throw new RuntimeException("No endpoints returned from Flight server.");
+ }
+
+ for (FlightEndpoint endpoint : info.getEndpoints()) {
+ // 3. Download the data from the server.
+ List<Location> locations = endpoint.getLocations();
+ if (locations.isEmpty()) {
+ throw new RuntimeException("No locations returned from Flight server.");
+ }
+ for (Location location : locations) {
+ System.out.println("Verifying location " + location.getUri());
+ try (FlightClient readClient = FlightClient.builder(allocator, location).build();
+ FlightStream stream = readClient.getStream(endpoint.getTicket());
+ VectorSchemaRoot root = stream.getRoot();
+ VectorSchemaRoot downloadedRoot = VectorSchemaRoot.create(root.getSchema(), allocator);
+ JsonFileReader reader = new JsonFileReader(new File(inputPath), allocator)) {
+ VectorLoader loader = new VectorLoader(downloadedRoot);
+ VectorUnloader unloader = new VectorUnloader(root);
+
+ Schema jsonSchema = reader.start();
+ Validator.compareSchemas(root.getSchema(), jsonSchema);
+ try (VectorSchemaRoot jsonRoot = VectorSchemaRoot.create(jsonSchema, allocator)) {
+
+ while (stream.next()) {
+ try (final ArrowRecordBatch arb = unloader.getRecordBatch()) {
+ loader.load(arb);
+ if (reader.read(jsonRoot)) {
+
+ // 4. Validate the data.
+ Validator.compareVectorSchemaRoot(jsonRoot, downloadedRoot);
+ jsonRoot.clear();
+ } else {
+ throw new RuntimeException("Flight stream has more batches than JSON");
+ }
+ }
+ }
+
+ // Verify no more batches with data in JSON
+ // NOTE: Currently the C++ Flight server skips empty batches at end of the stream
+ if (reader.read(jsonRoot) && jsonRoot.getRowCount() > 0) {
+ throw new RuntimeException("JSON has more batches with than Flight stream");
+ }
+ }
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/integration/IntegrationTestServer.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/integration/IntegrationTestServer.java
new file mode 100644
index 000000000..da336c502
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/integration/IntegrationTestServer.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.example.integration;
+
+import org.apache.arrow.flight.FlightServer;
+import org.apache.arrow.flight.Location;
+import org.apache.arrow.flight.example.InMemoryStore;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.util.AutoCloseables;
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.DefaultParser;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+
+/**
+ * Flight server for integration testing.
+ */
+class IntegrationTestServer {
+ private static final org.slf4j.Logger LOGGER = org.slf4j.LoggerFactory.getLogger(IntegrationTestServer.class);
+ private final Options options;
+
+ private IntegrationTestServer() {
+ options = new Options();
+ options.addOption("port", true, "The port to serve on.");
+ options.addOption("scenario", true, "The integration test scenario.");
+ }
+
+ private void run(String[] args) throws Exception {
+ CommandLineParser parser = new DefaultParser();
+ CommandLine cmd = parser.parse(options, args, false);
+ final int port = Integer.parseInt(cmd.getOptionValue("port", "31337"));
+ final Location location = Location.forGrpcInsecure("localhost", port);
+
+ final BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+ final FlightServer.Builder builder = FlightServer.builder().allocator(allocator).location(location);
+
+ final FlightServer server;
+ if (cmd.hasOption("scenario")) {
+ final Scenario scenario = Scenarios.getScenario(cmd.getOptionValue("scenario"));
+ scenario.buildServer(builder);
+ server = builder.producer(scenario.producer(allocator, location)).build();
+ server.start();
+ } else {
+ final InMemoryStore store = new InMemoryStore(allocator, location);
+ server = FlightServer.builder(allocator, location, store).build().start();
+ store.setLocation(Location.forGrpcInsecure("localhost", server.getPort()));
+ }
+ // Print out message for integration test script
+ System.out.println("Server listening on localhost:" + server.getPort());
+
+ Runtime.getRuntime().addShutdownHook(new Thread(() -> {
+ try {
+ System.out.println("\nExiting...");
+ AutoCloseables.close(server, allocator);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }));
+
+ server.awaitTermination();
+ }
+
+ public static void main(String[] args) {
+ try {
+ new IntegrationTestServer().run(args);
+ } catch (ParseException e) {
+ fatalError("Error parsing arguments", e);
+ } catch (Exception e) {
+ fatalError("Runtime error", e);
+ }
+ }
+
+ private static void fatalError(String message, Throwable e) {
+ System.err.println(message);
+ System.err.println(e.getMessage());
+ LOGGER.error(message, e);
+ System.exit(1);
+ }
+
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/integration/MiddlewareScenario.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/integration/MiddlewareScenario.java
new file mode 100644
index 000000000..c710ce98b
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/integration/MiddlewareScenario.java
@@ -0,0 +1,168 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.example.integration;
+
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.Collections;
+
+import org.apache.arrow.flight.CallHeaders;
+import org.apache.arrow.flight.CallInfo;
+import org.apache.arrow.flight.CallStatus;
+import org.apache.arrow.flight.FlightClient;
+import org.apache.arrow.flight.FlightClientMiddleware;
+import org.apache.arrow.flight.FlightDescriptor;
+import org.apache.arrow.flight.FlightInfo;
+import org.apache.arrow.flight.FlightProducer;
+import org.apache.arrow.flight.FlightRuntimeException;
+import org.apache.arrow.flight.FlightServer;
+import org.apache.arrow.flight.FlightServerMiddleware;
+import org.apache.arrow.flight.Location;
+import org.apache.arrow.flight.NoOpFlightProducer;
+import org.apache.arrow.flight.RequestContext;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+/**
+ * Test an edge case in middleware: gRPC-Java consolidates headers and trailers if a call fails immediately. On the
+ * gRPC implementation side, we need to watch for this, or else we'll have a call with "no headers" if we only look
+ * for headers.
+ */
+final class MiddlewareScenario implements Scenario {
+
+ private static final String HEADER = "x-middleware";
+ private static final String EXPECTED_HEADER_VALUE = "expected value";
+ private static final byte[] COMMAND_SUCCESS = "success".getBytes(StandardCharsets.UTF_8);
+
+ @Override
+ public FlightProducer producer(BufferAllocator allocator, Location location) {
+ return new NoOpFlightProducer() {
+ @Override
+ public FlightInfo getFlightInfo(CallContext context, FlightDescriptor descriptor) {
+ if (descriptor.isCommand()) {
+ if (Arrays.equals(COMMAND_SUCCESS, descriptor.getCommand())) {
+ return new FlightInfo(new Schema(Collections.emptyList()), descriptor, Collections.emptyList(), -1, -1);
+ }
+ }
+ throw CallStatus.UNIMPLEMENTED.toRuntimeException();
+ }
+ };
+ }
+
+ @Override
+ public void buildServer(FlightServer.Builder builder) {
+ builder.middleware(FlightServerMiddleware.Key.of("test"), new InjectingServerMiddleware.Factory());
+ }
+
+ @Override
+ public void client(BufferAllocator allocator, Location location, FlightClient ignored) throws Exception {
+ final ExtractingClientMiddleware.Factory factory = new ExtractingClientMiddleware.Factory();
+ try (final FlightClient client = FlightClient.builder(allocator, location).intercept(factory).build()) {
+ // Should fail immediately
+ IntegrationAssertions.assertThrows(FlightRuntimeException.class,
+ () -> client.getInfo(FlightDescriptor.command(new byte[0])));
+ if (!EXPECTED_HEADER_VALUE.equals(factory.extractedHeader)) {
+ throw new AssertionError(
+ "Expected to extract the header value '" +
+ EXPECTED_HEADER_VALUE +
+ "', but found: " +
+ factory.extractedHeader);
+ }
+
+ // Should not fail
+ factory.extractedHeader = "";
+ client.getInfo(FlightDescriptor.command(COMMAND_SUCCESS));
+ if (!EXPECTED_HEADER_VALUE.equals(factory.extractedHeader)) {
+ throw new AssertionError(
+ "Expected to extract the header value '" +
+ EXPECTED_HEADER_VALUE +
+ "', but found: " +
+ factory.extractedHeader);
+ }
+ }
+ }
+
+ /** Middleware that inserts a constant value in outgoing requests. */
+ static class InjectingServerMiddleware implements FlightServerMiddleware {
+
+ private final String headerValue;
+
+ InjectingServerMiddleware(String incoming) {
+ this.headerValue = incoming;
+ }
+
+ @Override
+ public void onBeforeSendingHeaders(CallHeaders outgoingHeaders) {
+ outgoingHeaders.insert("x-middleware", headerValue);
+ }
+
+ @Override
+ public void onCallCompleted(CallStatus status) {
+ }
+
+ @Override
+ public void onCallErrored(Throwable err) {
+ }
+
+ /** The factory for the server middleware. */
+ static class Factory implements FlightServerMiddleware.Factory<InjectingServerMiddleware> {
+
+ @Override
+ public InjectingServerMiddleware onCallStarted(CallInfo info, CallHeaders incomingHeaders,
+ RequestContext context) {
+ String incoming = incomingHeaders.get(HEADER);
+ return new InjectingServerMiddleware(incoming == null ? "" : incoming);
+ }
+ }
+ }
+
+ /** Middleware that pulls a value out of incoming responses. */
+ static class ExtractingClientMiddleware implements FlightClientMiddleware {
+
+ private final ExtractingClientMiddleware.Factory factory;
+
+ public ExtractingClientMiddleware(ExtractingClientMiddleware.Factory factory) {
+ this.factory = factory;
+ }
+
+ @Override
+ public void onBeforeSendingHeaders(CallHeaders outgoingHeaders) {
+ outgoingHeaders.insert(HEADER, EXPECTED_HEADER_VALUE);
+ }
+
+ @Override
+ public void onHeadersReceived(CallHeaders incomingHeaders) {
+ this.factory.extractedHeader = incomingHeaders.get(HEADER);
+ }
+
+ @Override
+ public void onCallCompleted(CallStatus status) {
+ }
+
+ /** The factory for the client middleware. */
+ static class Factory implements FlightClientMiddleware.Factory {
+
+ String extractedHeader = null;
+
+ @Override
+ public FlightClientMiddleware onCallStarted(CallInfo info) {
+ return new ExtractingClientMiddleware(this);
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/integration/Scenario.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/integration/Scenario.java
new file mode 100644
index 000000000..b3b962d2e
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/integration/Scenario.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.example.integration;
+
+import org.apache.arrow.flight.FlightClient;
+import org.apache.arrow.flight.FlightProducer;
+import org.apache.arrow.flight.FlightServer;
+import org.apache.arrow.flight.Location;
+import org.apache.arrow.memory.BufferAllocator;
+
+/**
+ * A particular scenario in integration testing.
+ */
+interface Scenario {
+
+ /**
+ * Construct the FlightProducer for a server in this scenario.
+ */
+ FlightProducer producer(BufferAllocator allocator, Location location) throws Exception;
+
+ /**
+ * Set any other server options.
+ */
+ void buildServer(FlightServer.Builder builder) throws Exception;
+
+ /**
+ * Run as the client in the scenario.
+ */
+ void client(BufferAllocator allocator, Location location, FlightClient client) throws Exception;
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/integration/Scenarios.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/integration/Scenarios.java
new file mode 100644
index 000000000..cd9859b4f
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/example/integration/Scenarios.java
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.example.integration;
+
+import java.util.Map;
+import java.util.TreeMap;
+import java.util.concurrent.TimeUnit;
+import java.util.function.Supplier;
+
+import org.apache.arrow.flight.FlightClient;
+import org.apache.arrow.flight.FlightServer;
+import org.apache.arrow.flight.Location;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+
+/**
+ * Scenarios for integration testing.
+ */
+final class Scenarios {
+
+ private static Scenarios INSTANCE;
+
+ private final Map<String, Supplier<Scenario>> scenarios;
+
+ private Scenarios() {
+ scenarios = new TreeMap<>();
+ scenarios.put("auth:basic_proto", AuthBasicProtoScenario::new);
+ scenarios.put("middleware", MiddlewareScenario::new);
+ }
+
+ private static Scenarios getInstance() {
+ if (INSTANCE == null) {
+ INSTANCE = new Scenarios();
+ }
+ return INSTANCE;
+ }
+
+ static Scenario getScenario(String scenario) {
+ final Supplier<Scenario> ctor = getInstance().scenarios.get(scenario);
+ if (ctor == null) {
+ throw new IllegalArgumentException("Unknown integration test scenario: " + scenario);
+ }
+ return ctor.get();
+ }
+
+ // Utility methods for implementing tests.
+
+ public static void main(String[] args) {
+ // Run scenarios one after the other
+ final Location location = Location.forGrpcInsecure("localhost", 31337);
+ for (final Map.Entry<String, Supplier<Scenario>> entry : getInstance().scenarios.entrySet()) {
+ System.out.println("Running test scenario: " + entry.getKey());
+ final Scenario scenario = entry.getValue().get();
+ try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE)) {
+ final FlightServer.Builder builder = FlightServer
+ .builder(allocator, location, scenario.producer(allocator, location));
+ scenario.buildServer(builder);
+ try (final FlightServer server = builder.build()) {
+ server.start();
+
+ try (final FlightClient client = FlightClient.builder(allocator, location).build()) {
+ scenario.client(allocator, location, client);
+ }
+
+ server.shutdown();
+ server.awaitTermination(1, TimeUnit.SECONDS);
+ System.out.println("Ran scenario " + entry.getKey());
+ }
+ } catch (Exception e) {
+ System.out.println("Exception while running scenario " + entry.getKey());
+ e.printStackTrace();
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/AddWritableBuffer.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/AddWritableBuffer.java
new file mode 100644
index 000000000..26e0274fa
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/AddWritableBuffer.java
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.grpc;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Field;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.util.List;
+
+import io.netty.buffer.ByteBuf;
+
+/**
+ * Allow a user to add a ByteBuf based InputStream directly into GRPC WritableBuffer to avoid an
+ * extra copy. This could be solved in GRPC by adding a ByteBufListable interface on InputStream and
+ * letting BufferChainOutputStream take advantage of it.
+ */
+public class AddWritableBuffer {
+
+ private static final Constructor<?> bufConstruct;
+ private static final Field bufferList;
+ private static final Field current;
+ private static final Method listAdd;
+ private static final Class<?> bufChainOut;
+
+ static {
+
+ Constructor<?> tmpConstruct = null;
+ Field tmpBufferList = null;
+ Field tmpCurrent = null;
+ Class<?> tmpBufChainOut = null;
+ Method tmpListAdd = null;
+
+ try {
+ Class<?> nwb = Class.forName("io.grpc.netty.NettyWritableBuffer");
+
+ Constructor<?> tmpConstruct2 = nwb.getDeclaredConstructor(ByteBuf.class);
+ tmpConstruct2.setAccessible(true);
+
+ Class<?> tmpBufChainOut2 = Class.forName("io.grpc.internal.MessageFramer$BufferChainOutputStream");
+
+ Field tmpBufferList2 = tmpBufChainOut2.getDeclaredField("bufferList");
+ tmpBufferList2.setAccessible(true);
+
+ Field tmpCurrent2 = tmpBufChainOut2.getDeclaredField("current");
+ tmpCurrent2.setAccessible(true);
+
+ Method tmpListAdd2 = List.class.getDeclaredMethod("add", Object.class);
+
+ // output fields last.
+ tmpConstruct = tmpConstruct2;
+ tmpBufferList = tmpBufferList2;
+ tmpCurrent = tmpCurrent2;
+ tmpListAdd = tmpListAdd2;
+ tmpBufChainOut = tmpBufChainOut2;
+
+ } catch (Exception ex) {
+ ex.printStackTrace();
+ }
+
+ bufConstruct = tmpConstruct;
+ bufferList = tmpBufferList;
+ current = tmpCurrent;
+ listAdd = tmpListAdd;
+ bufChainOut = tmpBufChainOut;
+
+ }
+
+ /**
+ * Add the provided ByteBuf to the gRPC BufferChainOutputStream if possible, else copy the buffer to the stream.
+ * @param buf The buffer to add.
+ * @param stream The Candidate OutputStream to add to.
+ * @param tryZeroCopy If true, try to zero-copy append the buffer to the stream. This may not succeed.
+ * @return True if buffer was zero-copy added to the stream. False if the buffer was copied.
+ * @throws IOException if the fast path is not enabled and there was an error copying the buffer to the stream.
+ */
+ public static boolean add(ByteBuf buf, OutputStream stream, boolean tryZeroCopy) throws IOException {
+ if (!tryZeroCopy || !tryAddBuffer(buf, stream)) {
+ buf.getBytes(0, stream, buf.readableBytes());
+ return false;
+ }
+ return true;
+ }
+
+ private static boolean tryAddBuffer(ByteBuf buf, OutputStream stream) throws IOException {
+
+ if (bufChainOut == null) {
+ return false;
+ }
+
+ if (!stream.getClass().equals(bufChainOut)) {
+ return false;
+ }
+
+ try {
+ if (current.get(stream) != null) {
+ return false;
+ }
+
+ buf.retain();
+ Object obj = bufConstruct.newInstance(buf);
+ Object list = bufferList.get(stream);
+ listAdd.invoke(list, obj);
+ current.set(stream, obj);
+ return true;
+ } catch (IllegalAccessException | IllegalArgumentException | InvocationTargetException | InstantiationException e) {
+ e.printStackTrace();
+ return false;
+ }
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/CallCredentialAdapter.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/CallCredentialAdapter.java
new file mode 100644
index 000000000..285ddb9ba
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/CallCredentialAdapter.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.grpc;
+
+import java.util.concurrent.Executor;
+import java.util.function.Consumer;
+
+import org.apache.arrow.flight.CallHeaders;
+
+import io.grpc.CallCredentials;
+import io.grpc.Metadata;
+
+/**
+ * Adapter class to utilize a CredentialWriter to implement Grpc CallCredentials.
+ */
+public class CallCredentialAdapter extends CallCredentials {
+
+ private final Consumer<CallHeaders> credentialWriter;
+
+ public CallCredentialAdapter(Consumer<CallHeaders> credentialWriter) {
+ this.credentialWriter = credentialWriter;
+ }
+
+ @Override
+ public void applyRequestMetadata(RequestInfo requestInfo, Executor executor, MetadataApplier metadataApplier) {
+ executor.execute(() ->
+ {
+ final Metadata headers = new Metadata();
+ credentialWriter.accept(new MetadataAdapter(headers));
+ metadataApplier.apply(headers);
+ });
+ }
+
+ @Override
+ public void thisUsesUnstableApi() {
+ // Mandatory to override this to acknowledge that CallCredentials is Experimental.
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/ClientInterceptorAdapter.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/ClientInterceptorAdapter.java
new file mode 100644
index 000000000..ae11e5260
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/ClientInterceptorAdapter.java
@@ -0,0 +1,149 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.grpc;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.arrow.flight.CallInfo;
+import org.apache.arrow.flight.CallStatus;
+import org.apache.arrow.flight.FlightClientMiddleware;
+import org.apache.arrow.flight.FlightClientMiddleware.Factory;
+import org.apache.arrow.flight.FlightMethod;
+import org.apache.arrow.flight.FlightRuntimeException;
+import org.apache.arrow.flight.FlightStatusCode;
+
+import io.grpc.CallOptions;
+import io.grpc.Channel;
+import io.grpc.ClientCall;
+import io.grpc.ClientInterceptor;
+import io.grpc.ForwardingClientCall.SimpleForwardingClientCall;
+import io.grpc.ForwardingClientCallListener.SimpleForwardingClientCallListener;
+import io.grpc.Metadata;
+import io.grpc.MethodDescriptor;
+import io.grpc.Status;
+import io.grpc.StatusRuntimeException;
+
+/**
+ * An adapter between Flight client middleware and gRPC interceptors.
+ *
+ * <p>This is implemented as a single gRPC interceptor that runs all Flight client middleware sequentially.
+ */
+public class ClientInterceptorAdapter implements ClientInterceptor {
+
+ private final List<Factory> factories;
+
+ public ClientInterceptorAdapter(List<Factory> factories) {
+ this.factories = factories;
+ }
+
+ @Override
+ public <ReqT, RespT> ClientCall<ReqT, RespT> interceptCall(MethodDescriptor<ReqT, RespT> method,
+ CallOptions callOptions, Channel next) {
+ final List<FlightClientMiddleware> middleware = new ArrayList<>();
+ final CallInfo info = new CallInfo(FlightMethod.fromProtocol(method.getFullMethodName()));
+
+ try {
+ for (final Factory factory : factories) {
+ middleware.add(factory.onCallStarted(info));
+ }
+ } catch (FlightRuntimeException e) {
+ // Explicitly propagate
+ throw e;
+ } catch (StatusRuntimeException e) {
+ throw StatusUtils.fromGrpcRuntimeException(e);
+ } catch (RuntimeException e) {
+ throw StatusUtils.fromThrowable(e);
+ }
+ return new FlightClientCall<>(next.newCall(method, callOptions), middleware);
+ }
+
+ /**
+ * The ClientCallListener which hooks into the gRPC request cycle and actually runs middleware at certain points.
+ */
+ private static class FlightClientCallListener<RespT> extends SimpleForwardingClientCallListener<RespT> {
+
+ private final List<FlightClientMiddleware> middleware;
+ boolean receivedHeaders;
+
+ public FlightClientCallListener(ClientCall.Listener<RespT> responseListener,
+ List<FlightClientMiddleware> middleware) {
+ super(responseListener);
+ this.middleware = middleware;
+ receivedHeaders = false;
+ }
+
+ @Override
+ public void onHeaders(Metadata headers) {
+ receivedHeaders = true;
+ final MetadataAdapter adapter = new MetadataAdapter(headers);
+ try {
+ middleware.forEach(m -> m.onHeadersReceived(adapter));
+ } finally {
+ // Make sure to always call the gRPC callback to avoid interrupting the gRPC request cycle
+ super.onHeaders(headers);
+ }
+ }
+
+ @Override
+ public void onClose(Status status, Metadata trailers) {
+ try {
+ if (!receivedHeaders) {
+ // gRPC doesn't always send response headers if the call errors or completes immediately, but instead
+ // consolidates them with the trailers. If we never got headers, assume this happened and run the header
+ // callback with the trailers.
+ final MetadataAdapter adapter = new MetadataAdapter(trailers);
+ middleware.forEach(m -> m.onHeadersReceived(adapter));
+ }
+ final CallStatus flightStatus = StatusUtils.fromGrpcStatusAndTrailers(status, trailers);
+ middleware.forEach(m -> m.onCallCompleted(flightStatus));
+ } finally {
+ // Make sure to always call the gRPC callback to avoid interrupting the gRPC request cycle
+ super.onClose(status, trailers);
+ }
+ }
+ }
+
+ /**
+ * The gRPC ClientCall which hooks into the gRPC request cycle and injects our ClientCallListener.
+ */
+ private static class FlightClientCall<ReqT, RespT> extends SimpleForwardingClientCall<ReqT, RespT> {
+
+ private final List<FlightClientMiddleware> middleware;
+
+ public FlightClientCall(ClientCall<ReqT, RespT> clientCall, List<FlightClientMiddleware> middleware) {
+ super(clientCall);
+ this.middleware = middleware;
+ }
+
+ @Override
+ public void start(Listener<RespT> responseListener, Metadata headers) {
+ final MetadataAdapter metadataAdapter = new MetadataAdapter(headers);
+ middleware.forEach(m -> m.onBeforeSendingHeaders(metadataAdapter));
+
+ super.start(new FlightClientCallListener<>(responseListener, middleware), headers);
+ }
+
+ @Override
+ public void cancel(String message, Throwable cause) {
+ final CallStatus flightStatus = new CallStatus(FlightStatusCode.CANCELLED, cause, message, null);
+ middleware.forEach(m -> m.onCallCompleted(flightStatus));
+ super.cancel(message, cause);
+ }
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/ContextPropagatingExecutorService.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/ContextPropagatingExecutorService.java
new file mode 100644
index 000000000..8f6bb6db2
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/ContextPropagatingExecutorService.java
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.grpc;
+
+import java.util.Collection;
+import java.util.List;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+import java.util.stream.Collectors;
+
+import io.grpc.Context;
+
+/**
+ * An {@link ExecutorService} that propagates the {@link Context}.
+ *
+ * <p>Context is used to propagate per-call state, like the authenticated user, between threads (as gRPC makes no
+ * guarantees about what thread things execute on). This wrapper makes it easy to preserve this when using an Executor.
+ * The Context itself is immutable, so it is thread-safe.
+ */
+public class ContextPropagatingExecutorService implements ExecutorService {
+
+ private final ExecutorService delegate;
+
+ public ContextPropagatingExecutorService(ExecutorService delegate) {
+ this.delegate = delegate;
+ }
+
+ // These are just delegate methods.
+
+ @Override
+ public void shutdown() {
+ delegate.shutdown();
+ }
+
+ @Override
+ public List<Runnable> shutdownNow() {
+ return delegate.shutdownNow();
+ }
+
+ @Override
+ public boolean isShutdown() {
+ return delegate.isShutdown();
+ }
+
+ @Override
+ public boolean isTerminated() {
+ return delegate.isTerminated();
+ }
+
+ @Override
+ public boolean awaitTermination(long timeout, TimeUnit unit) throws InterruptedException {
+ return delegate.awaitTermination(timeout, unit);
+ }
+
+ // These are delegate methods that wrap the submitted task in the current gRPC Context.
+
+ @Override
+ public <T> Future<T> submit(Callable<T> task) {
+ return delegate.submit(Context.current().wrap(task));
+ }
+
+ @Override
+ public <T> Future<T> submit(Runnable task, T result) {
+ return delegate.submit(Context.current().wrap(task), result);
+ }
+
+ @Override
+ public Future<?> submit(Runnable task) {
+ return delegate.submit(Context.current().wrap(task));
+ }
+
+ @Override
+ public <T> List<Future<T>> invokeAll(Collection<? extends Callable<T>> tasks) throws InterruptedException {
+ return delegate.invokeAll(tasks.stream().map(Context.current()::wrap).collect(Collectors.toList()));
+ }
+
+ @Override
+ public <T> List<Future<T>> invokeAll(Collection<? extends Callable<T>> tasks, long timeout,
+ TimeUnit unit) throws InterruptedException {
+ return delegate.invokeAll(tasks.stream().map(Context.current()::wrap).collect(Collectors.toList()), timeout, unit);
+ }
+
+ @Override
+ public <T> T invokeAny(Collection<? extends Callable<T>> tasks) throws InterruptedException, ExecutionException {
+ return delegate.invokeAny(tasks.stream().map(Context.current()::wrap).collect(Collectors.toList()));
+ }
+
+ @Override
+ public <T> T invokeAny(Collection<? extends Callable<T>> tasks, long timeout, TimeUnit unit)
+ throws InterruptedException, ExecutionException, TimeoutException {
+ return delegate.invokeAny(tasks.stream().map(Context.current()::wrap).collect(Collectors.toList()), timeout, unit);
+ }
+
+ @Override
+ public void execute(Runnable command) {
+ delegate.execute(Context.current().wrap(command));
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/CredentialCallOption.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/CredentialCallOption.java
new file mode 100644
index 000000000..3bde7a835
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/CredentialCallOption.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.grpc;
+
+import java.util.function.Consumer;
+
+import org.apache.arrow.flight.CallHeaders;
+import org.apache.arrow.flight.CallOptions;
+
+import io.grpc.stub.AbstractStub;
+
+/**
+ * Method option for supplying credentials to method calls.
+ */
+public class CredentialCallOption implements CallOptions.GrpcCallOption {
+ private final Consumer<CallHeaders> credentialWriter;
+
+ public CredentialCallOption(Consumer<CallHeaders> credentialWriter) {
+ this.credentialWriter = credentialWriter;
+ }
+
+ @Override
+ public <T extends AbstractStub<T>> T wrapStub(T stub) {
+ return stub.withCallCredentials(new CallCredentialAdapter(credentialWriter));
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/GetReadableBuffer.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/GetReadableBuffer.java
new file mode 100644
index 000000000..5f8a71576
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/GetReadableBuffer.java
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.grpc;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.lang.reflect.Field;
+
+import org.apache.arrow.memory.ArrowBuf;
+
+import com.google.common.base.Throwables;
+import com.google.common.io.ByteStreams;
+
+import io.grpc.internal.ReadableBuffer;
+
+/**
+ * Enable access to ReadableBuffer directly to copy data from a BufferInputStream into a target
+ * ByteBuffer/ByteBuf.
+ *
+ * <p>This could be solved by BufferInputStream exposing Drainable.
+ */
+public class GetReadableBuffer {
+
+ private static final Field READABLE_BUFFER;
+ private static final Class<?> BUFFER_INPUT_STREAM;
+
+ static {
+ Field tmpField = null;
+ Class<?> tmpClazz = null;
+ try {
+ Class<?> clazz = Class.forName("io.grpc.internal.ReadableBuffers$BufferInputStream");
+
+ Field f = clazz.getDeclaredField("buffer");
+ f.setAccessible(true);
+ // don't set until we've gotten past all exception cases.
+ tmpField = f;
+ tmpClazz = clazz;
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ READABLE_BUFFER = tmpField;
+ BUFFER_INPUT_STREAM = tmpClazz;
+ }
+
+ /**
+ * Extracts the ReadableBuffer for the given input stream.
+ *
+ * @param is Must be an instance of io.grpc.internal.ReadableBuffers$BufferInputStream or
+ * null will be returned.
+ */
+ public static ReadableBuffer getReadableBuffer(InputStream is) {
+
+ if (BUFFER_INPUT_STREAM == null || !is.getClass().equals(BUFFER_INPUT_STREAM)) {
+ return null;
+ }
+
+ try {
+ return (ReadableBuffer) READABLE_BUFFER.get(is);
+ } catch (Exception ex) {
+ throw Throwables.propagate(ex);
+ }
+ }
+
+ /**
+ * Helper method to read a gRPC-provided InputStream into an ArrowBuf.
+ * @param stream The stream to read from. Should be an instance of {@link #BUFFER_INPUT_STREAM}.
+ * @param buf The buffer to read into.
+ * @param size The number of bytes to read.
+ * @param fastPath Whether to enable the fast path (i.e. detect whether the stream is a {@link #BUFFER_INPUT_STREAM}).
+ * @throws IOException if there is an error reading form the stream
+ */
+ public static void readIntoBuffer(final InputStream stream, final ArrowBuf buf, final int size,
+ final boolean fastPath) throws IOException {
+ ReadableBuffer readableBuffer = fastPath ? getReadableBuffer(stream) : null;
+ if (readableBuffer != null) {
+ readableBuffer.readBytes(buf.nioBuffer(0, size));
+ } else {
+ byte[] heapBytes = new byte[size];
+ ByteStreams.readFully(stream, heapBytes);
+ buf.writeBytes(heapBytes);
+ }
+ buf.writerIndex(size);
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/MetadataAdapter.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/MetadataAdapter.java
new file mode 100644
index 000000000..4327f0ca8
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/MetadataAdapter.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.grpc;
+
+import java.util.HashSet;
+import java.util.Set;
+import java.util.stream.Collectors;
+import java.util.stream.StreamSupport;
+
+import org.apache.arrow.flight.CallHeaders;
+
+import io.grpc.Metadata;
+import io.grpc.Metadata.Key;
+
+/**
+ * A mutable adapter between the gRPC Metadata object and the Flight headers interface.
+ *
+ * <p>This allows us to present the headers (metadata) from gRPC without copying to/from our own object.
+ */
+public class MetadataAdapter implements CallHeaders {
+
+ private final Metadata metadata;
+
+ public MetadataAdapter(Metadata metadata) {
+ this.metadata = metadata;
+ }
+
+ @Override
+ public String get(String key) {
+ return this.metadata.get(Key.of(key, Metadata.ASCII_STRING_MARSHALLER));
+ }
+
+ @Override
+ public byte[] getByte(String key) {
+ if (key.endsWith(Metadata.BINARY_HEADER_SUFFIX)) {
+ return this.metadata.get(Key.of(key, Metadata.BINARY_BYTE_MARSHALLER));
+ }
+ return get(key).getBytes();
+ }
+
+ @Override
+ public Iterable<String> getAll(String key) {
+ return this.metadata.getAll(Key.of(key, Metadata.ASCII_STRING_MARSHALLER));
+ }
+
+ @Override
+ public Iterable<byte[]> getAllByte(String key) {
+ if (key.endsWith(Metadata.BINARY_HEADER_SUFFIX)) {
+ return this.metadata.getAll(Key.of(key, Metadata.BINARY_BYTE_MARSHALLER));
+ }
+ return StreamSupport.stream(getAll(key).spliterator(), false)
+ .map(String::getBytes).collect(Collectors.toList());
+ }
+
+ @Override
+ public void insert(String key, String value) {
+ this.metadata.put(Key.of(key, Metadata.ASCII_STRING_MARSHALLER), value);
+ }
+
+ @Override
+ public void insert(String key, byte[] value) {
+ this.metadata.put(Key.of(key, Metadata.BINARY_BYTE_MARSHALLER), value);
+ }
+
+ @Override
+ public Set<String> keys() {
+ return new HashSet<>(this.metadata.keys());
+ }
+
+ @Override
+ public boolean containsKey(String key) {
+ if (key.endsWith("-bin")) {
+ final Key<?> grpcKey = Key.of(key, Metadata.BINARY_BYTE_MARSHALLER);
+ return this.metadata.containsKey(grpcKey);
+ }
+ final Key<?> grpcKey = Key.of(key, Metadata.ASCII_STRING_MARSHALLER);
+ return this.metadata.containsKey(grpcKey);
+ }
+
+ public String toString() {
+ return this.metadata.toString();
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/RequestContextAdapter.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/RequestContextAdapter.java
new file mode 100644
index 000000000..9be4d12b9
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/RequestContextAdapter.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.grpc;
+
+import java.util.HashMap;
+import java.util.Set;
+
+import org.apache.arrow.flight.RequestContext;
+
+import io.grpc.Context;
+
+
+/**
+ * Adapter for holding key value pairs.
+ */
+public class RequestContextAdapter implements RequestContext {
+ public static final Context.Key<RequestContext> REQUEST_CONTEXT_KEY =
+ Context.key("arrow-flight-request-context");
+ private final HashMap<String, String> map = new HashMap<>();
+
+ @Override
+ public void put(String key, String value) {
+ if (map.putIfAbsent(key, value) != null) {
+ throw new IllegalArgumentException("Duplicate write to a RequestContext at key " + key + " not allowed.");
+ }
+ }
+
+ @Override
+ public String get(String key) {
+ return map.get(key);
+ }
+
+ @Override
+ public Set<String> keySet() {
+ return map.keySet();
+ }
+
+ @Override
+ public String remove(String key) {
+ return map.remove(key);
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/ServerInterceptorAdapter.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/ServerInterceptorAdapter.java
new file mode 100644
index 000000000..ddf43ff84
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/ServerInterceptorAdapter.java
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.grpc;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.arrow.flight.CallInfo;
+import org.apache.arrow.flight.CallStatus;
+import org.apache.arrow.flight.FlightMethod;
+import org.apache.arrow.flight.FlightProducer.CallContext;
+import org.apache.arrow.flight.FlightRuntimeException;
+import org.apache.arrow.flight.FlightServerMiddleware;
+import org.apache.arrow.flight.FlightServerMiddleware.Factory;
+import org.apache.arrow.flight.FlightServerMiddleware.Key;
+
+import io.grpc.Context;
+import io.grpc.Contexts;
+import io.grpc.ForwardingServerCall.SimpleForwardingServerCall;
+import io.grpc.Metadata;
+import io.grpc.ServerCall;
+import io.grpc.ServerCall.Listener;
+import io.grpc.ServerCallHandler;
+import io.grpc.ServerInterceptor;
+import io.grpc.Status;
+
+/**
+ * An adapter between Flight middleware and a gRPC interceptor.
+ *
+ * <p>This is implemented as a single gRPC interceptor that runs all Flight server middleware sequentially. Flight
+ * middleware instances are stored in the gRPC Context so their state is accessible later.
+ */
+public class ServerInterceptorAdapter implements ServerInterceptor {
+
+ /**
+ * A combination of a middleware Key and factory.
+ *
+ * @param <T> The middleware type.
+ */
+ public static class KeyFactory<T extends FlightServerMiddleware> {
+
+ private final FlightServerMiddleware.Key<T> key;
+ private final FlightServerMiddleware.Factory<T> factory;
+
+ public KeyFactory(Key<T> key, Factory<T> factory) {
+ this.key = key;
+ this.factory = factory;
+ }
+ }
+
+ /**
+ * The {@link Context.Key} that stores the Flight middleware active for a particular call.
+ *
+ * <p>Applications should not use this directly. Instead, see {@link CallContext#getMiddleware(Key)}.
+ */
+ public static final Context.Key<Map<FlightServerMiddleware.Key<?>, FlightServerMiddleware>> SERVER_MIDDLEWARE_KEY =
+ Context.key("arrow.flight.server_middleware");
+ private final List<KeyFactory<?>> factories;
+
+ public ServerInterceptorAdapter(List<KeyFactory<?>> factories) {
+ this.factories = factories;
+ }
+
+ @Override
+ public <ReqT, RespT> Listener<ReqT> interceptCall(ServerCall<ReqT, RespT> call, Metadata headers,
+ ServerCallHandler<ReqT, RespT> next) {
+ final CallInfo info = new CallInfo(FlightMethod.fromProtocol(call.getMethodDescriptor().getFullMethodName()));
+ final List<FlightServerMiddleware> middleware = new ArrayList<>();
+ // Use LinkedHashMap to preserve insertion order
+ final Map<FlightServerMiddleware.Key<?>, FlightServerMiddleware> middlewareMap = new LinkedHashMap<>();
+ final MetadataAdapter headerAdapter = new MetadataAdapter(headers);
+ final RequestContextAdapter requestContextAdapter = new RequestContextAdapter();
+ for (final KeyFactory<?> factory : factories) {
+ final FlightServerMiddleware m;
+ try {
+ m = factory.factory.onCallStarted(info, headerAdapter, requestContextAdapter);
+ } catch (FlightRuntimeException e) {
+ // Cancel call
+ call.close(StatusUtils.toGrpcStatus(e.status()), new Metadata());
+ return new Listener<ReqT>() {};
+ }
+ middleware.add(m);
+ middlewareMap.put(factory.key, m);
+ }
+
+ // Inject the middleware into the context so RPC method implementations can communicate with middleware instances
+ final Context contextWithMiddlewareAndRequestsOptions = Context.current()
+ .withValue(SERVER_MIDDLEWARE_KEY, Collections.unmodifiableMap(middlewareMap))
+ .withValue(RequestContextAdapter.REQUEST_CONTEXT_KEY, requestContextAdapter);
+
+ final SimpleForwardingServerCall<ReqT, RespT> forwardingServerCall = new SimpleForwardingServerCall<ReqT, RespT>(
+ call) {
+ boolean sentHeaders = false;
+
+ @Override
+ public void sendHeaders(Metadata headers) {
+ sentHeaders = true;
+ try {
+ final MetadataAdapter headerAdapter = new MetadataAdapter(headers);
+ middleware.forEach(m -> m.onBeforeSendingHeaders(headerAdapter));
+ } finally {
+ // Make sure to always call the gRPC callback to avoid interrupting the gRPC request cycle
+ super.sendHeaders(headers);
+ }
+ }
+
+ @Override
+ public void close(Status status, Metadata trailers) {
+ try {
+ if (!sentHeaders) {
+ // gRPC doesn't always send response headers if the call errors or completes immediately
+ final MetadataAdapter headerAdapter = new MetadataAdapter(trailers);
+ middleware.forEach(m -> m.onBeforeSendingHeaders(headerAdapter));
+ }
+ } finally {
+ // Make sure to always call the gRPC callback to avoid interrupting the gRPC request cycle
+ super.close(status, trailers);
+ }
+
+ final CallStatus flightStatus = StatusUtils.fromGrpcStatus(status);
+ middleware.forEach(m -> m.onCallCompleted(flightStatus));
+ }
+ };
+ return Contexts.interceptCall(contextWithMiddlewareAndRequestsOptions, forwardingServerCall, headers, next);
+
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/StatusUtils.java b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/StatusUtils.java
new file mode 100644
index 000000000..55e841864
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/StatusUtils.java
@@ -0,0 +1,255 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.grpc;
+
+import java.util.Iterator;
+import java.util.Objects;
+import java.util.function.Function;
+
+import org.apache.arrow.flight.CallStatus;
+import org.apache.arrow.flight.ErrorFlightMetadata;
+import org.apache.arrow.flight.FlightRuntimeException;
+import org.apache.arrow.flight.FlightStatusCode;
+
+import io.grpc.InternalMetadata;
+import io.grpc.Metadata;
+import io.grpc.Status;
+import io.grpc.Status.Code;
+import io.grpc.StatusException;
+import io.grpc.StatusRuntimeException;
+
+/**
+ * Utilities to adapt gRPC and Flight status objects.
+ *
+ * <p>NOT A PUBLIC CLASS, interface is not guaranteed to remain stable.
+ */
+public class StatusUtils {
+
+ private StatusUtils() {
+ throw new AssertionError("Do not instantiate this class.");
+ }
+
+ /**
+ * Convert from a Flight status code to a gRPC status code.
+ */
+ public static Status.Code toGrpcStatusCode(FlightStatusCode code) {
+ switch (code) {
+ case OK:
+ return Code.OK;
+ case UNKNOWN:
+ return Code.UNKNOWN;
+ case INTERNAL:
+ return Code.INTERNAL;
+ case INVALID_ARGUMENT:
+ return Code.INVALID_ARGUMENT;
+ case TIMED_OUT:
+ return Code.DEADLINE_EXCEEDED;
+ case NOT_FOUND:
+ return Code.NOT_FOUND;
+ case ALREADY_EXISTS:
+ return Code.ALREADY_EXISTS;
+ case CANCELLED:
+ return Code.CANCELLED;
+ case UNAUTHENTICATED:
+ return Code.UNAUTHENTICATED;
+ case UNAUTHORIZED:
+ return Code.PERMISSION_DENIED;
+ case UNIMPLEMENTED:
+ return Code.UNIMPLEMENTED;
+ case UNAVAILABLE:
+ return Code.UNAVAILABLE;
+ default:
+ return Code.UNKNOWN;
+ }
+ }
+
+ /**
+ * Convert from a gRPC status code to a Flight status code.
+ */
+ public static FlightStatusCode fromGrpcStatusCode(Status.Code code) {
+ switch (code) {
+ case OK:
+ return FlightStatusCode.OK;
+ case CANCELLED:
+ return FlightStatusCode.CANCELLED;
+ case UNKNOWN:
+ return FlightStatusCode.UNKNOWN;
+ case INVALID_ARGUMENT:
+ return FlightStatusCode.INVALID_ARGUMENT;
+ case DEADLINE_EXCEEDED:
+ return FlightStatusCode.TIMED_OUT;
+ case NOT_FOUND:
+ return FlightStatusCode.NOT_FOUND;
+ case ALREADY_EXISTS:
+ return FlightStatusCode.ALREADY_EXISTS;
+ case PERMISSION_DENIED:
+ return FlightStatusCode.UNAUTHORIZED;
+ case RESOURCE_EXHAUSTED:
+ return FlightStatusCode.INVALID_ARGUMENT;
+ case FAILED_PRECONDITION:
+ return FlightStatusCode.INVALID_ARGUMENT;
+ case ABORTED:
+ return FlightStatusCode.INTERNAL;
+ case OUT_OF_RANGE:
+ return FlightStatusCode.INVALID_ARGUMENT;
+ case UNIMPLEMENTED:
+ return FlightStatusCode.UNIMPLEMENTED;
+ case INTERNAL:
+ return FlightStatusCode.INTERNAL;
+ case UNAVAILABLE:
+ return FlightStatusCode.UNAVAILABLE;
+ case DATA_LOSS:
+ return FlightStatusCode.INTERNAL;
+ case UNAUTHENTICATED:
+ return FlightStatusCode.UNAUTHENTICATED;
+ default:
+ return FlightStatusCode.UNKNOWN;
+ }
+ }
+
+ /** Create Metadata Key for binary metadata. */
+ static Metadata.Key<byte[]> keyOfBinary(String name) {
+ return Metadata.Key.of(name, Metadata.BINARY_BYTE_MARSHALLER);
+ }
+
+ /** Create Metadata Key for ascii metadata. */
+ static Metadata.Key<String> keyOfAscii(String name) {
+ // Use InternalMetadata for keys that start with ":", e.g. ":status". See ARROW-14014.
+ return InternalMetadata.keyOf(name, Metadata.ASCII_STRING_MARSHALLER);
+ }
+
+ /** Convert from a gRPC Status & trailers to a Flight status. */
+ public static CallStatus fromGrpcStatusAndTrailers(Status status, Metadata trailers) {
+ // gRPC may not always have trailers - this happens when the server internally generates an error, which is rare,
+ // but can happen.
+ final ErrorFlightMetadata errorMetadata = trailers == null ? null : parseTrailers(trailers);
+ return new CallStatus(
+ fromGrpcStatusCode(status.getCode()),
+ status.getCause(),
+ status.getDescription(),
+ errorMetadata);
+ }
+
+ /** Convert from a gRPC status to a Flight status. */
+ public static CallStatus fromGrpcStatus(Status status) {
+ return new CallStatus(
+ fromGrpcStatusCode(status.getCode()),
+ status.getCause(),
+ status.getDescription(),
+ null);
+ }
+
+ /** Convert from a Flight status to a gRPC status. */
+ public static Status toGrpcStatus(CallStatus status) {
+ return toGrpcStatusCode(status.code()).toStatus().withDescription(status.description()).withCause(status.cause());
+ }
+
+ /** Convert from a gRPC exception to a Flight exception. */
+ public static FlightRuntimeException fromGrpcRuntimeException(StatusRuntimeException sre) {
+ return fromGrpcStatusAndTrailers(sre.getStatus(), sre.getTrailers()).toRuntimeException();
+ }
+
+ /** Convert gRPC trailers into Flight error metadata. */
+ private static ErrorFlightMetadata parseTrailers(Metadata trailers) {
+ ErrorFlightMetadata metadata = new ErrorFlightMetadata();
+ for (String key : trailers.keys()) {
+ if (key.endsWith(Metadata.BINARY_HEADER_SUFFIX)) {
+ metadata.insert(key, trailers.get(keyOfBinary(key)));
+ } else {
+ metadata.insert(key, Objects.requireNonNull(trailers.get(keyOfAscii(key))).getBytes());
+ }
+ }
+ return metadata;
+ }
+
+ /**
+ * Convert arbitrary exceptions to a {@link FlightRuntimeException}.
+ */
+ public static FlightRuntimeException fromThrowable(Throwable t) {
+ if (t instanceof StatusRuntimeException) {
+ return fromGrpcRuntimeException((StatusRuntimeException) t);
+ } else if (t instanceof FlightRuntimeException) {
+ return (FlightRuntimeException) t;
+ }
+ return CallStatus.UNKNOWN.withCause(t).withDescription(t.getMessage()).toRuntimeException();
+ }
+
+ /**
+ * Convert arbitrary exceptions to a {@link StatusRuntimeException} or {@link StatusException}.
+ *
+ * <p>Such exceptions can be passed to {@link io.grpc.stub.StreamObserver#onError(Throwable)} and will give the client
+ * a reasonable error message.
+ */
+ public static Throwable toGrpcException(Throwable ex) {
+ if (ex instanceof StatusRuntimeException) {
+ return ex;
+ } else if (ex instanceof StatusException) {
+ return ex;
+ } else if (ex instanceof FlightRuntimeException) {
+ final FlightRuntimeException fre = (FlightRuntimeException) ex;
+ if (fre.status().metadata() != null) {
+ Metadata trailers = toGrpcMetadata(fre.status().metadata());
+ return new StatusRuntimeException(toGrpcStatus(fre.status()), trailers);
+ }
+ return toGrpcStatus(fre.status()).asRuntimeException();
+ }
+ return Status.INTERNAL.withCause(ex).withDescription("There was an error servicing your request.")
+ .asRuntimeException();
+ }
+
+ private static Metadata toGrpcMetadata(ErrorFlightMetadata metadata) {
+ final Metadata trailers = new Metadata();
+ for (final String key : metadata.keys()) {
+ if (key.endsWith(Metadata.BINARY_HEADER_SUFFIX)) {
+ trailers.put(keyOfBinary(key), metadata.getByte(key));
+ } else {
+ trailers.put(keyOfAscii(key), metadata.get(key));
+ }
+ }
+ return trailers;
+ }
+
+ /**
+ * Maps a transformation function to the elements of an iterator, while wrapping exceptions in {@link
+ * FlightRuntimeException}.
+ */
+ public static <FROM, TO> Iterator<TO> wrapIterator(Iterator<FROM> fromIterator,
+ Function<? super FROM, ? extends TO> transformer) {
+ Objects.requireNonNull(fromIterator);
+ Objects.requireNonNull(transformer);
+ return new Iterator<TO>() {
+ @Override
+ public boolean hasNext() {
+ try {
+ return fromIterator.hasNext();
+ } catch (StatusRuntimeException e) {
+ throw fromGrpcRuntimeException(e);
+ }
+ }
+
+ @Override
+ public TO next() {
+ try {
+ return transformer.apply(fromIterator.next());
+ } catch (StatusRuntimeException e) {
+ throw fromGrpcRuntimeException(e);
+ }
+ }
+ };
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/FlightTestUtil.java b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/FlightTestUtil.java
new file mode 100644
index 000000000..cd043b639
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/FlightTestUtil.java
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.io.File;
+import java.io.IOException;
+import java.lang.reflect.InvocationTargetException;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Objects;
+import java.util.Random;
+import java.util.function.Function;
+
+import org.junit.Assert;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.function.Executable;
+
+/**
+ * Utility methods and constants for testing flight servers.
+ */
+public class FlightTestUtil {
+
+ private static final Random RANDOM = new Random();
+
+ public static final String LOCALHOST = "localhost";
+ public static final String TEST_DATA_ENV_VAR = "ARROW_TEST_DATA";
+ public static final String TEST_DATA_PROPERTY = "arrow.test.dataRoot";
+
+ /**
+ * Returns a a FlightServer (actually anything that is startable)
+ * that has been started bound to a random port.
+ */
+ public static <T> T getStartedServer(Function<Location, T> newServerFromLocation) throws IOException {
+ IOException lastThrown = null;
+ T server = null;
+ for (int x = 0; x < 3; x++) {
+ final int port = 49152 + RANDOM.nextInt(5000);
+ final Location location = Location.forGrpcInsecure(LOCALHOST, port);
+ lastThrown = null;
+ try {
+ server = newServerFromLocation.apply(location);
+ try {
+ server.getClass().getMethod("start").invoke(server);
+ } catch (NoSuchMethodException | IllegalAccessException e) {
+ throw new IllegalArgumentException("Couldn't call start method on object.", e);
+ }
+ break;
+ } catch (InvocationTargetException e) {
+ if (e.getTargetException() instanceof IOException) {
+ lastThrown = (IOException) e.getTargetException();
+ } else {
+ throw (RuntimeException) e.getTargetException();
+ }
+ }
+ }
+ if (lastThrown != null) {
+ throw lastThrown;
+ }
+ return server;
+ }
+
+ static Path getTestDataRoot() {
+ String path = System.getenv(TEST_DATA_ENV_VAR);
+ if (path == null) {
+ path = System.getProperty(TEST_DATA_PROPERTY);
+ }
+ return Paths.get(Objects.requireNonNull(path,
+ String.format("Could not find test data path. Set the environment variable %s or the JVM property %s.",
+ TEST_DATA_ENV_VAR, TEST_DATA_PROPERTY)));
+ }
+
+ static Path getFlightTestDataRoot() {
+ return getTestDataRoot().resolve("flight");
+ }
+
+ static Path exampleTlsRootCert() {
+ return getFlightTestDataRoot().resolve("root-ca.pem");
+ }
+
+ static List<CertKeyPair> exampleTlsCerts() {
+ final Path root = getFlightTestDataRoot();
+ return Arrays.asList(new CertKeyPair(root.resolve("cert0.pem").toFile(), root.resolve("cert0.pkcs1").toFile()),
+ new CertKeyPair(root.resolve("cert1.pem").toFile(), root.resolve("cert1.pkcs1").toFile()));
+ }
+
+ static boolean isEpollAvailable() {
+ try {
+ Class<?> epoll = Class.forName("io.netty.channel.epoll.Epoll");
+ return (Boolean) epoll.getMethod("isAvailable").invoke(null);
+ } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | InvocationTargetException e) {
+ return false;
+ }
+ }
+
+ static boolean isKqueueAvailable() {
+ try {
+ Class<?> kqueue = Class.forName("io.netty.channel.kqueue.KQueue");
+ return (Boolean) kqueue.getMethod("isAvailable").invoke(null);
+ } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | InvocationTargetException e) {
+ return false;
+ }
+ }
+
+ static boolean isNativeTransportAvailable() {
+ return isEpollAvailable() || isKqueueAvailable();
+ }
+
+ /**
+ * Assert that the given runnable fails with a Flight exception of the given code.
+ * @param code The expected Flight status code.
+ * @param r The code to run.
+ * @return The thrown status.
+ */
+ public static CallStatus assertCode(FlightStatusCode code, Executable r) {
+ final FlightRuntimeException ex = Assertions.assertThrows(FlightRuntimeException.class, r);
+ Assert.assertEquals(code, ex.status().code());
+ return ex.status();
+ }
+
+ public static class CertKeyPair {
+
+ public final File cert;
+ public final File key;
+
+ public CertKeyPair(File cert, File key) {
+ this.cert = cert;
+ this.key = key;
+ }
+ }
+
+ private FlightTestUtil() {
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestApplicationMetadata.java b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestApplicationMetadata.java
new file mode 100644
index 000000000..c7b3321af
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestApplicationMetadata.java
@@ -0,0 +1,329 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.TimeUnit;
+import java.util.function.BiConsumer;
+
+import org.apache.arrow.flight.FlightClient.PutListener;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.Assert;
+import org.junit.Ignore;
+import org.junit.Test;
+
+/**
+ * Tests for application-specific metadata support in Flight.
+ */
+public class TestApplicationMetadata {
+
+ // The command used to trigger the test for ARROW-6136.
+ private static final byte[] COMMAND_ARROW_6136 = "ARROW-6136".getBytes();
+ // The expected error message.
+ private static final String MESSAGE_ARROW_6136 = "The stream should not be double-closed.";
+
+ /**
+ * Ensure that a client can read the metadata sent from the server.
+ */
+ @Test
+ // This test is consistently flaky on CI, unfortunately.
+ @Ignore
+ public void retrieveMetadata() {
+ test((allocator, client) -> {
+ try (final FlightStream stream = client.getStream(new Ticket(new byte[0]))) {
+ byte i = 0;
+ while (stream.next()) {
+ final IntVector vector = (IntVector) stream.getRoot().getVector("a");
+ Assert.assertEquals(1, vector.getValueCount());
+ Assert.assertEquals(10, vector.get(0));
+ Assert.assertEquals(i, stream.getLatestMetadata().getByte(0));
+ i++;
+ }
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ });
+ }
+
+ /** ARROW-6136: make sure that the Flight implementation doesn't double-close the server-to-client stream. */
+ @Test
+ public void arrow6136() {
+ final Schema schema = new Schema(Collections.emptyList());
+ test((allocator, client) -> {
+ try (final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) {
+ final FlightDescriptor descriptor = FlightDescriptor.command(COMMAND_ARROW_6136);
+
+ final PutListener listener = new SyncPutListener();
+ final FlightClient.ClientStreamListener writer = client.startPut(descriptor, root, listener);
+ // Must attempt to retrieve the result to get any server-side errors.
+ final CallStatus status = FlightTestUtil.assertCode(FlightStatusCode.INTERNAL, writer::getResult);
+ Assert.assertEquals(MESSAGE_ARROW_6136, status.description());
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ });
+ }
+
+ /**
+ * Ensure that a client can send metadata to the server.
+ */
+ @Test
+ @Ignore
+ public void uploadMetadataAsync() {
+ final Schema schema = new Schema(Collections.singletonList(Field.nullable("a", new ArrowType.Int(32, true))));
+ test((allocator, client) -> {
+ try (final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) {
+ final FlightDescriptor descriptor = FlightDescriptor.path("test");
+
+ final PutListener listener = new AsyncPutListener() {
+ int counter = 0;
+
+ @Override
+ public void onNext(PutResult val) {
+ Assert.assertNotNull(val);
+ Assert.assertEquals(counter, val.getApplicationMetadata().getByte(0));
+ counter++;
+ }
+ };
+ final FlightClient.ClientStreamListener writer = client.startPut(descriptor, root, listener);
+
+ root.allocateNew();
+ for (byte i = 0; i < 10; i++) {
+ final IntVector vector = (IntVector) root.getVector("a");
+ final ArrowBuf metadata = allocator.buffer(1);
+ metadata.writeByte(i);
+ vector.set(0, 10);
+ vector.setValueCount(1);
+ root.setRowCount(1);
+ writer.putNext(metadata);
+ }
+ writer.completed();
+ // Must attempt to retrieve the result to get any server-side errors.
+ writer.getResult();
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ });
+ }
+
+ /**
+ * Ensure that a client can send metadata to the server. Uses the synchronous API.
+ */
+ @Test
+ @Ignore
+ public void uploadMetadataSync() {
+ final Schema schema = new Schema(Collections.singletonList(Field.nullable("a", new ArrowType.Int(32, true))));
+ test((allocator, client) -> {
+ try (final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator);
+ final SyncPutListener listener = new SyncPutListener()) {
+ final FlightDescriptor descriptor = FlightDescriptor.path("test");
+ final FlightClient.ClientStreamListener writer = client.startPut(descriptor, root, listener);
+
+ root.allocateNew();
+ for (byte i = 0; i < 10; i++) {
+ final IntVector vector = (IntVector) root.getVector("a");
+ final ArrowBuf metadata = allocator.buffer(1);
+ metadata.writeByte(i);
+ vector.set(0, 10);
+ vector.setValueCount(1);
+ root.setRowCount(1);
+ writer.putNext(metadata);
+ try (final PutResult message = listener.poll(5000, TimeUnit.SECONDS)) {
+ Assert.assertNotNull(message);
+ Assert.assertEquals(i, message.getApplicationMetadata().getByte(0));
+ } catch (InterruptedException | ExecutionException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ writer.completed();
+ // Must attempt to retrieve the result to get any server-side errors.
+ writer.getResult();
+ }
+ });
+ }
+
+ /**
+ * Make sure that a {@link SyncPutListener} properly reclaims memory if ignored.
+ */
+ @Test
+ @Ignore
+ public void syncMemoryReclaimed() {
+ final Schema schema = new Schema(Collections.singletonList(Field.nullable("a", new ArrowType.Int(32, true))));
+ test((allocator, client) -> {
+ try (final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator);
+ final SyncPutListener listener = new SyncPutListener()) {
+ final FlightDescriptor descriptor = FlightDescriptor.path("test");
+ final FlightClient.ClientStreamListener writer = client.startPut(descriptor, root, listener);
+
+ root.allocateNew();
+ for (byte i = 0; i < 10; i++) {
+ final IntVector vector = (IntVector) root.getVector("a");
+ final ArrowBuf metadata = allocator.buffer(1);
+ metadata.writeByte(i);
+ vector.set(0, 10);
+ vector.setValueCount(1);
+ root.setRowCount(1);
+ writer.putNext(metadata);
+ }
+ writer.completed();
+ // Must attempt to retrieve the result to get any server-side errors.
+ writer.getResult();
+ }
+ });
+ }
+
+ /**
+ * ARROW-9221: Flight copies metadata from the byte buffer of a Protobuf ByteString,
+ * which is in big-endian by default, thus mangling metadata.
+ */
+ @Test
+ public void testMetadataEndianness() throws Exception {
+ try (final BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+ final BufferAllocator serverAllocator = allocator.newChildAllocator("flight-server", 0, Long.MAX_VALUE);
+ final FlightServer server = FlightTestUtil.getStartedServer(
+ (location) -> FlightServer
+ .builder(serverAllocator, location, new EndianFlightProducer(serverAllocator))
+ .build());
+ final FlightClient client = FlightClient.builder(allocator, server.getLocation()).build()) {
+ final Schema schema = new Schema(Collections.emptyList());
+ final FlightDescriptor descriptor = FlightDescriptor.command(new byte[0]);
+ try (final SyncPutListener reader = new SyncPutListener();
+ final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) {
+ final FlightClient.ClientStreamListener writer = client.startPut(descriptor, root, reader);
+ writer.completed();
+ try (final PutResult metadata = reader.read()) {
+ Assert.assertEquals(16, metadata.getApplicationMetadata().readableBytes());
+ byte[] bytes = new byte[16];
+ metadata.getApplicationMetadata().readBytes(bytes);
+ Assert.assertArrayEquals(EndianFlightProducer.EXPECTED_BYTES, bytes);
+ }
+ writer.getResult();
+ }
+ }
+ }
+
+ private void test(BiConsumer<BufferAllocator, FlightClient> fun) {
+ try (final BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+ final FlightServer s =
+ FlightTestUtil.getStartedServer(
+ (location) -> FlightServer.builder(allocator, location, new MetadataFlightProducer(allocator)).build());
+ final FlightClient client = FlightClient.builder(allocator, s.getLocation()).build()) {
+ fun.accept(allocator, client);
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ /**
+ * A FlightProducer that always produces a fixed data stream with metadata on the side.
+ */
+ private static class MetadataFlightProducer extends NoOpFlightProducer {
+
+ private final BufferAllocator allocator;
+
+ public MetadataFlightProducer(BufferAllocator allocator) {
+ this.allocator = allocator;
+ }
+
+ @Override
+ public void getStream(CallContext context, Ticket ticket, ServerStreamListener listener) {
+ final Schema schema = new Schema(Collections.singletonList(Field.nullable("a", new ArrowType.Int(32, true))));
+ try (VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) {
+ root.allocateNew();
+ listener.start(root);
+ for (byte i = 0; i < 10; i++) {
+ final IntVector vector = (IntVector) root.getVector("a");
+ vector.set(0, 10);
+ vector.setValueCount(1);
+ root.setRowCount(1);
+ final ArrowBuf metadata = allocator.buffer(1);
+ metadata.writeByte(i);
+ listener.putNext(metadata);
+ }
+ listener.completed();
+ }
+ }
+
+ @Override
+ public Runnable acceptPut(CallContext context, FlightStream stream, StreamListener<PutResult> ackStream) {
+ return () -> {
+ // Wait for the descriptor to be sent
+ stream.getRoot();
+ if (stream.getDescriptor().isCommand() &&
+ Arrays.equals(stream.getDescriptor().getCommand(), COMMAND_ARROW_6136)) {
+ // ARROW-6136: Try closing the stream
+ ackStream.onError(
+ CallStatus.INTERNAL.withDescription(MESSAGE_ARROW_6136).toRuntimeException());
+ return;
+ }
+ try {
+ byte current = 0;
+ while (stream.next()) {
+ final ArrowBuf metadata = stream.getLatestMetadata();
+ if (current != metadata.getByte(0)) {
+ ackStream.onError(CallStatus.INVALID_ARGUMENT.withDescription(String
+ .format("Metadata does not match expected value; got %d but expected %d.", metadata.getByte(0),
+ current)).toRuntimeException());
+ return;
+ }
+ ackStream.onNext(PutResult.metadata(metadata));
+ current++;
+ }
+ if (current != 10) {
+ throw CallStatus.INVALID_ARGUMENT.withDescription("Wrong number of messages sent.").toRuntimeException();
+ }
+ } catch (Exception e) {
+ throw CallStatus.INTERNAL.withCause(e).withDescription(e.toString()).toRuntimeException();
+ }
+ };
+ }
+ }
+
+ private static class EndianFlightProducer extends NoOpFlightProducer {
+ static final byte[] EXPECTED_BYTES = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+ private final BufferAllocator allocator;
+
+ private EndianFlightProducer(BufferAllocator allocator) {
+ this.allocator = allocator;
+ }
+
+ @Override
+ public Runnable acceptPut(CallContext context, FlightStream flightStream, StreamListener<PutResult> ackStream) {
+ return () -> {
+ while (flightStream.next()) {
+ // Ignore any data
+ }
+
+ try (final ArrowBuf buf = allocator.buffer(16)) {
+ buf.writeBytes(EXPECTED_BYTES);
+ ackStream.onNext(PutResult.metadata(buf));
+ }
+ ackStream.onCompleted();
+ };
+ }
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestAuth.java b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestAuth.java
new file mode 100644
index 000000000..6f0ec9f02
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestAuth.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.util.Iterator;
+import java.util.Optional;
+
+import org.apache.arrow.flight.auth.ClientAuthHandler;
+import org.apache.arrow.flight.auth.ServerAuthHandler;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.junit.Test;
+
+public class TestAuth {
+
+ /** An auth handler that does not send messages should not block the server forever. */
+ @Test(expected = RuntimeException.class)
+ public void noMessages() throws Exception {
+ try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+ final FlightServer s = FlightTestUtil
+ .getStartedServer(
+ location -> FlightServer.builder(allocator, location, new NoOpFlightProducer()).authHandler(
+ new OneshotAuthHandler()).build());
+ final FlightClient client = FlightClient.builder(allocator, s.getLocation()).build()) {
+ client.authenticate(new ClientAuthHandler() {
+ @Override
+ public void authenticate(ClientAuthSender outgoing, Iterator<byte[]> incoming) {
+ }
+
+ @Override
+ public byte[] getCallToken() {
+ return new byte[0];
+ }
+ });
+ }
+ }
+
+ /** An auth handler that sends an error should not block the server forever. */
+ @Test(expected = RuntimeException.class)
+ public void clientError() throws Exception {
+ try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+ final FlightServer s = FlightTestUtil
+ .getStartedServer(
+ location -> FlightServer.builder(allocator, location, new NoOpFlightProducer()).authHandler(
+ new OneshotAuthHandler()).build());
+ final FlightClient client = FlightClient.builder(allocator, s.getLocation()).build()) {
+ client.authenticate(new ClientAuthHandler() {
+ @Override
+ public void authenticate(ClientAuthSender outgoing, Iterator<byte[]> incoming) {
+ outgoing.send(new byte[0]);
+ // Ensure the server-side runs
+ incoming.next();
+ outgoing.onError(new RuntimeException("test"));
+ }
+
+ @Override
+ public byte[] getCallToken() {
+ return new byte[0];
+ }
+ });
+ }
+ }
+
+ private static class OneshotAuthHandler implements ServerAuthHandler {
+
+ @Override
+ public Optional<String> isValid(byte[] token) {
+ return Optional.of("test");
+ }
+
+ @Override
+ public boolean authenticate(ServerAuthSender outgoing, Iterator<byte[]> incoming) {
+ incoming.next();
+ outgoing.send(new byte[0]);
+ return false;
+ }
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestBackPressure.java b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestBackPressure.java
new file mode 100644
index 000000000..1a71c363e
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestBackPressure.java
@@ -0,0 +1,262 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.function.Function;
+
+import org.apache.arrow.flight.perf.PerformanceTestServer;
+import org.apache.arrow.flight.perf.TestPerf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.Assert;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import com.google.common.collect.ImmutableList;
+
+public class TestBackPressure {
+
+ private static final int BATCH_SIZE = 4095;
+
+ /**
+ * Make sure that failing to consume one stream doesn't block other streams.
+ */
+ @Ignore
+ @Test
+ public void ensureIndependentSteams() throws Exception {
+ ensureIndependentSteams((b) -> (location -> new PerformanceTestServer(b, location)));
+ }
+
+ /**
+ * Make sure that failing to consume one stream doesn't block other streams.
+ */
+ @Ignore
+ @Test
+ public void ensureIndependentSteamsWithCallbacks() throws Exception {
+ ensureIndependentSteams((b) -> (location -> new PerformanceTestServer(b, location,
+ new BackpressureStrategy.CallbackBackpressureStrategy(), true)));
+ }
+
+ /**
+ * Test to make sure stream doesn't go faster than the consumer is consuming.
+ */
+ @Ignore
+ @Test
+ public void ensureWaitUntilProceed() throws Exception {
+ ensureWaitUntilProceed(new PollingBackpressureStrategy(), false);
+ }
+
+ /**
+ * Test to make sure stream doesn't go faster than the consumer is consuming using a callback-based
+ * backpressure strategy.
+ */
+ @Ignore
+ @Test
+ public void ensureWaitUntilProceedWithCallbacks() throws Exception {
+ ensureWaitUntilProceed(new RecordingCallbackBackpressureStrategy(), true);
+ }
+
+ /**
+ * Make sure that failing to consume one stream doesn't block other streams.
+ */
+ private static void ensureIndependentSteams(Function<BufferAllocator, Function<Location, PerformanceTestServer>>
+ serverConstructor) throws Exception {
+ try (
+ final BufferAllocator a = new RootAllocator(Long.MAX_VALUE);
+ final PerformanceTestServer server = FlightTestUtil.getStartedServer(
+ (location) -> (serverConstructor.apply(a).apply(location)));
+ final FlightClient client = FlightClient.builder(a, server.getLocation()).build()
+ ) {
+ try (FlightStream fs1 = client.getStream(client.getInfo(
+ TestPerf.getPerfFlightDescriptor(110L * BATCH_SIZE, BATCH_SIZE, 1))
+ .getEndpoints().get(0).getTicket())) {
+ consume(fs1, 10);
+
+ // stop consuming fs1 but make sure we can consume a large amount of fs2.
+ try (FlightStream fs2 = client.getStream(client.getInfo(
+ TestPerf.getPerfFlightDescriptor(200L * BATCH_SIZE, BATCH_SIZE, 1))
+ .getEndpoints().get(0).getTicket())) {
+ consume(fs2, 100);
+
+ consume(fs1, 100);
+ consume(fs2, 100);
+
+ consume(fs1);
+ consume(fs2);
+ }
+ }
+ }
+ }
+
+ /**
+ * Make sure that a stream doesn't go faster than the consumer is consuming.
+ */
+ private static void ensureWaitUntilProceed(SleepTimeRecordingBackpressureStrategy bpStrategy, boolean isNonBlocking)
+ throws Exception {
+ // request some values.
+ final long wait = 3000;
+ final long epsilon = 1000;
+
+ try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE)) {
+
+ final FlightProducer producer = new NoOpFlightProducer() {
+
+ @Override
+ public void getStream(CallContext context, Ticket ticket, ServerStreamListener listener) {
+ bpStrategy.register(listener);
+ final Runnable loadData = () -> {
+ int batches = 0;
+ final Schema pojoSchema = new Schema(ImmutableList.of(Field.nullable("a", MinorType.BIGINT.getType())));
+ try (VectorSchemaRoot root = VectorSchemaRoot.create(pojoSchema, allocator)) {
+ listener.start(root);
+ while (true) {
+ bpStrategy.waitForListener(0);
+ if (batches > 100) {
+ root.clear();
+ listener.completed();
+ return;
+ }
+
+ root.allocateNew();
+ root.setRowCount(4095);
+ listener.putNext();
+ batches++;
+ }
+ }
+ };
+
+ if (!isNonBlocking) {
+ loadData.run();
+ } else {
+ final ExecutorService service = Executors.newSingleThreadExecutor();
+ service.submit(loadData);
+ service.shutdown();
+ }
+ }
+ };
+
+
+ try (
+ BufferAllocator serverAllocator = allocator.newChildAllocator("server", 0, Long.MAX_VALUE);
+ FlightServer server =
+ FlightTestUtil.getStartedServer((location) -> FlightServer.builder(serverAllocator, location, producer)
+ .build());
+ BufferAllocator clientAllocator = allocator.newChildAllocator("client", 0, Long.MAX_VALUE);
+ FlightClient client =
+ FlightClient
+ .builder(clientAllocator, server.getLocation())
+ .build();
+ FlightStream stream = client.getStream(new Ticket(new byte[1]))
+ ) {
+ VectorSchemaRoot root = stream.getRoot();
+ root.clear();
+ Thread.sleep(wait);
+ while (stream.next()) {
+ root.clear();
+ }
+ long expected = wait - epsilon;
+ Assert.assertTrue(
+ String.format("Expected a sleep of at least %dms but only slept for %d", expected,
+ bpStrategy.getSleepTime()), bpStrategy.getSleepTime() > expected);
+
+ }
+ }
+ }
+
+ private static void consume(FlightStream stream) {
+ VectorSchemaRoot root = stream.getRoot();
+ while (stream.next()) {
+ root.clear();
+ }
+ }
+
+ private static void consume(FlightStream stream, int batches) {
+ VectorSchemaRoot root = stream.getRoot();
+ while (batches > 0 && stream.next()) {
+ root.clear();
+ batches--;
+ }
+ }
+
+ private interface SleepTimeRecordingBackpressureStrategy extends BackpressureStrategy {
+ /**
+ * Returns the total time spent waiting on the listener to be ready.
+ * @return the total time spent waiting on the listener to be ready.
+ */
+ long getSleepTime();
+ }
+
+ /**
+ * Implementation of a backpressure strategy that polls on isReady and records amount of time spent in Thread.sleep().
+ */
+ private static class PollingBackpressureStrategy implements SleepTimeRecordingBackpressureStrategy {
+ private final AtomicLong sleepTime = new AtomicLong(0);
+ private FlightProducer.ServerStreamListener listener;
+
+ @Override
+ public long getSleepTime() {
+ return sleepTime.get();
+ }
+
+ @Override
+ public void register(FlightProducer.ServerStreamListener listener) {
+ this.listener = listener;
+ }
+
+ @Override
+ public WaitResult waitForListener(long timeout) {
+ while (!listener.isReady()) {
+ try {
+ Thread.sleep(1);
+ sleepTime.addAndGet(1L);
+ } catch (InterruptedException ignore) {
+ }
+ }
+ return WaitResult.READY;
+ }
+ }
+
+ /**
+ * Implementation of a backpressure strategy that uses callbacks to detect changes in client readiness state
+ * and records spent time waiting.
+ */
+ private static class RecordingCallbackBackpressureStrategy extends BackpressureStrategy.CallbackBackpressureStrategy
+ implements SleepTimeRecordingBackpressureStrategy {
+ private final AtomicLong sleepTime = new AtomicLong(0);
+
+ @Override
+ public long getSleepTime() {
+ return sleepTime.get();
+ }
+
+ @Override
+ public WaitResult waitForListener(long timeout) {
+ final long startTime = System.currentTimeMillis();
+ final WaitResult result = super.waitForListener(timeout);
+ sleepTime.addAndGet(System.currentTimeMillis() - startTime);
+ return result;
+ }
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestBasicOperation.java b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestBasicOperation.java
new file mode 100644
index 000000000..e29cd07ce
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestBasicOperation.java
@@ -0,0 +1,567 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.function.BiConsumer;
+import java.util.function.Consumer;
+
+import org.apache.arrow.flight.FlightClient.ClientStreamListener;
+import org.apache.arrow.flight.impl.Flight;
+import org.apache.arrow.flight.impl.Flight.FlightDescriptor.DescriptorType;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.VectorUnloader;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.ipc.message.IpcOption;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.Assert;
+import org.junit.Test;
+
+import com.google.common.base.Charsets;
+import com.google.protobuf.ByteString;
+
+import io.grpc.MethodDescriptor;
+
+/**
+ * Test the operations of a basic flight service.
+ */
+public class TestBasicOperation {
+
+ @Test
+ public void fastPathDefaults() {
+ Assert.assertTrue(ArrowMessage.ENABLE_ZERO_COPY_READ);
+ Assert.assertFalse(ArrowMessage.ENABLE_ZERO_COPY_WRITE);
+ }
+
+ /**
+ * ARROW-6017: we should be able to construct locations for unknown schemes.
+ */
+ @Test
+ public void unknownScheme() throws URISyntaxException {
+ final Location location = new Location("s3://unknown");
+ Assert.assertEquals("s3", location.getUri().getScheme());
+ }
+
+ @Test
+ public void unknownSchemeRemote() throws Exception {
+ test(c -> {
+ try {
+ final FlightInfo info = c.getInfo(FlightDescriptor.path("test"));
+ Assert.assertEquals(new URI("https://example.com"), info.getEndpoints().get(0).getLocations().get(0).getUri());
+ } catch (URISyntaxException e) {
+ throw new RuntimeException(e);
+ }
+ });
+ }
+
+ @Test
+ public void roundTripTicket() throws Exception {
+ final Ticket ticket = new Ticket(new byte[]{0, 1, 2, 3, 4, 5});
+ Assert.assertEquals(ticket, Ticket.deserialize(ticket.serialize()));
+ }
+
+ @Test
+ public void roundTripInfo() throws Exception {
+ final Map<String, String> metadata = new HashMap<>();
+ metadata.put("foo", "bar");
+ final Schema schema = new Schema(Arrays.asList(
+ Field.nullable("a", new ArrowType.Int(32, true)),
+ Field.nullable("b", new ArrowType.FixedSizeBinary(32))
+ ), metadata);
+ final FlightInfo info1 = new FlightInfo(schema, FlightDescriptor.path(), Collections.emptyList(), -1, -1);
+ final FlightInfo info2 = new FlightInfo(schema, FlightDescriptor.command(new byte[2]),
+ Collections.singletonList(new FlightEndpoint(
+ new Ticket(new byte[10]), Location.forGrpcDomainSocket("/tmp/test.sock"))), 200, 500);
+ final FlightInfo info3 = new FlightInfo(schema, FlightDescriptor.path("a", "b"),
+ Arrays.asList(new FlightEndpoint(
+ new Ticket(new byte[10]), Location.forGrpcDomainSocket("/tmp/test.sock")),
+ new FlightEndpoint(
+ new Ticket(new byte[10]), Location.forGrpcDomainSocket("/tmp/test.sock"),
+ Location.forGrpcInsecure("localhost", 50051))
+ ), 200, 500);
+
+ Assert.assertEquals(info1, FlightInfo.deserialize(info1.serialize()));
+ Assert.assertEquals(info2, FlightInfo.deserialize(info2.serialize()));
+ Assert.assertEquals(info3, FlightInfo.deserialize(info3.serialize()));
+ }
+
+ @Test
+ public void roundTripDescriptor() throws Exception {
+ final FlightDescriptor cmd = FlightDescriptor.command("test command".getBytes(StandardCharsets.UTF_8));
+ Assert.assertEquals(cmd, FlightDescriptor.deserialize(cmd.serialize()));
+ final FlightDescriptor path = FlightDescriptor.path("foo", "bar", "test.arrow");
+ Assert.assertEquals(path, FlightDescriptor.deserialize(path.serialize()));
+ }
+
+ @Test
+ public void getDescriptors() throws Exception {
+ test(c -> {
+ int count = 0;
+ for (FlightInfo i : c.listFlights(Criteria.ALL)) {
+ count += 1;
+ }
+ Assert.assertEquals(1, count);
+ });
+ }
+
+ @Test
+ public void getDescriptorsWithCriteria() throws Exception {
+ test(c -> {
+ int count = 0;
+ for (FlightInfo i : c.listFlights(new Criteria(new byte[]{1}))) {
+ count += 1;
+ }
+ Assert.assertEquals(0, count);
+ });
+ }
+
+ @Test
+ public void getDescriptor() throws Exception {
+ test(c -> {
+ System.out.println(c.getInfo(FlightDescriptor.path("hello")).getDescriptor());
+ });
+ }
+
+ @Test
+ public void getSchema() throws Exception {
+ test(c -> {
+ System.out.println(c.getSchema(FlightDescriptor.path("hello")).getSchema());
+ });
+ }
+
+
+ @Test
+ public void listActions() throws Exception {
+ test(c -> {
+ for (ActionType at : c.listActions()) {
+ System.out.println(at.getType());
+ }
+ });
+ }
+
+ @Test
+ public void doAction() throws Exception {
+ test(c -> {
+ Iterator<Result> stream = c.doAction(new Action("hello"));
+
+ Assert.assertTrue(stream.hasNext());
+ Result r = stream.next();
+ Assert.assertArrayEquals("world".getBytes(Charsets.UTF_8), r.getBody());
+ });
+ test(c -> {
+ Iterator<Result> stream = c.doAction(new Action("hellooo"));
+
+ Assert.assertTrue(stream.hasNext());
+ Result r = stream.next();
+ Assert.assertArrayEquals("world".getBytes(Charsets.UTF_8), r.getBody());
+
+ Assert.assertTrue(stream.hasNext());
+ r = stream.next();
+ Assert.assertArrayEquals("!".getBytes(Charsets.UTF_8), r.getBody());
+ Assert.assertFalse(stream.hasNext());
+ });
+ }
+
+ @Test
+ public void putStream() throws Exception {
+ test((c, a) -> {
+ final int size = 10;
+
+ IntVector iv = new IntVector("c1", a);
+
+ try (VectorSchemaRoot root = VectorSchemaRoot.of(iv)) {
+ ClientStreamListener listener = c
+ .startPut(FlightDescriptor.path("hello"), root, new AsyncPutListener());
+
+ //batch 1
+ root.allocateNew();
+ for (int i = 0; i < size; i++) {
+ iv.set(i, i);
+ }
+ iv.setValueCount(size);
+ root.setRowCount(size);
+ listener.putNext();
+
+ // batch 2
+
+ root.allocateNew();
+ for (int i = 0; i < size; i++) {
+ iv.set(i, i + size);
+ }
+ iv.setValueCount(size);
+ root.setRowCount(size);
+ listener.putNext();
+ root.clear();
+ listener.completed();
+
+ // wait for ack to avoid memory leaks.
+ listener.getResult();
+ }
+ });
+ }
+
+ @Test
+ public void propagateErrors() throws Exception {
+ test(client -> {
+ FlightTestUtil.assertCode(FlightStatusCode.UNIMPLEMENTED, () -> {
+ client.doAction(new Action("invalid-action")).forEachRemaining(action -> Assert.fail());
+ });
+ });
+ }
+
+ @Test
+ public void getStream() throws Exception {
+ test(c -> {
+ try (final FlightStream stream = c.getStream(new Ticket(new byte[0]))) {
+ VectorSchemaRoot root = stream.getRoot();
+ IntVector iv = (IntVector) root.getVector("c1");
+ int value = 0;
+ while (stream.next()) {
+ for (int i = 0; i < root.getRowCount(); i++) {
+ Assert.assertEquals(value, iv.get(i));
+ value++;
+ }
+ }
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ });
+ }
+
+ /** Ensure the client is configured to accept large messages. */
+ @Test
+ public void getStreamLargeBatch() throws Exception {
+ test(c -> {
+ try (final FlightStream stream = c.getStream(new Ticket(Producer.TICKET_LARGE_BATCH))) {
+ Assert.assertEquals(128, stream.getRoot().getFieldVectors().size());
+ Assert.assertTrue(stream.next());
+ Assert.assertEquals(65536, stream.getRoot().getRowCount());
+ Assert.assertTrue(stream.next());
+ Assert.assertEquals(65536, stream.getRoot().getRowCount());
+ Assert.assertFalse(stream.next());
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ });
+ }
+
+ /** Ensure the server is configured to accept large messages. */
+ @Test
+ public void startPutLargeBatch() throws Exception {
+ try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE)) {
+ final List<FieldVector> vectors = new ArrayList<>();
+ for (int col = 0; col < 128; col++) {
+ final BigIntVector vector = new BigIntVector("f" + col, allocator);
+ for (int row = 0; row < 65536; row++) {
+ vector.setSafe(row, row);
+ }
+ vectors.add(vector);
+ }
+ test(c -> {
+ try (final VectorSchemaRoot root = new VectorSchemaRoot(vectors)) {
+ root.setRowCount(65536);
+ final ClientStreamListener stream = c.startPut(FlightDescriptor.path(""), root, new SyncPutListener());
+ stream.putNext();
+ stream.putNext();
+ stream.completed();
+ stream.getResult();
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ });
+ }
+ }
+
+ private void test(Consumer<FlightClient> consumer) throws Exception {
+ test((c, a) -> {
+ consumer.accept(c);
+ });
+ }
+
+ private void test(BiConsumer<FlightClient, BufferAllocator> consumer) throws Exception {
+ try (
+ BufferAllocator a = new RootAllocator(Long.MAX_VALUE);
+ Producer producer = new Producer(a);
+ FlightServer s =
+ FlightTestUtil.getStartedServer(
+ (location) -> FlightServer.builder(a, location, producer).build()
+ )) {
+
+ try (
+ FlightClient c = FlightClient.builder(a, s.getLocation()).build()
+ ) {
+ try (BufferAllocator testAllocator = a.newChildAllocator("testcase", 0, Long.MAX_VALUE)) {
+ consumer.accept(c, testAllocator);
+ }
+ }
+ }
+ }
+
+ /** Helper method to convert an ArrowMessage into a Protobuf message. */
+ private Flight.FlightData arrowMessageToProtobuf(
+ MethodDescriptor.Marshaller<ArrowMessage> marshaller, ArrowMessage message) throws IOException {
+ final ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ try (final InputStream serialized = marshaller.stream(message)) {
+ final byte[] buf = new byte[1024];
+ while (true) {
+ int read = serialized.read(buf);
+ if (read < 0) {
+ break;
+ }
+ baos.write(buf, 0, read);
+ }
+ }
+ final byte[] serializedMessage = baos.toByteArray();
+ return Flight.FlightData.parseFrom(serializedMessage);
+ }
+
+ /** ARROW-10962: accept FlightData messages generated by Protobuf (which can omit empty fields). */
+ @Test
+ public void testProtobufRecordBatchCompatibility() throws Exception {
+ final Schema schema = new Schema(Collections.singletonList(Field.nullable("foo", new ArrowType.Int(32, true))));
+ try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+ final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) {
+ final VectorUnloader unloader = new VectorUnloader(root);
+ root.setRowCount(0);
+ final MethodDescriptor.Marshaller<ArrowMessage> marshaller = ArrowMessage.createMarshaller(allocator);
+ try (final ArrowMessage message = new ArrowMessage(
+ unloader.getRecordBatch(), /* appMetadata */ null, /* tryZeroCopy */ false, IpcOption.DEFAULT)) {
+ Assert.assertEquals(ArrowMessage.HeaderType.RECORD_BATCH, message.getMessageType());
+ // Should have at least one empty body buffer (there may be multiple for e.g. data and validity)
+ Iterator<ArrowBuf> iterator = message.getBufs().iterator();
+ Assert.assertTrue(iterator.hasNext());
+ while (iterator.hasNext()) {
+ Assert.assertEquals(0, iterator.next().capacity());
+ }
+ final Flight.FlightData protobufData = arrowMessageToProtobuf(marshaller, message)
+ .toBuilder()
+ .clearDataBody()
+ .build();
+ Assert.assertEquals(0, protobufData.getDataBody().size());
+ ArrowMessage parsedMessage = marshaller.parse(new ByteArrayInputStream(protobufData.toByteArray()));
+ // Should have an empty body buffer
+ Iterator<ArrowBuf> parsedIterator = parsedMessage.getBufs().iterator();
+ Assert.assertTrue(parsedIterator.hasNext());
+ Assert.assertEquals(0, parsedIterator.next().capacity());
+ // Should have only one (the parser synthesizes exactly one); in the case of empty buffers, this is equivalent
+ Assert.assertFalse(parsedIterator.hasNext());
+ // Should not throw
+ final ArrowRecordBatch rb = parsedMessage.asRecordBatch();
+ Assert.assertEquals(rb.computeBodyLength(), 0);
+ }
+ }
+ }
+
+ /** ARROW-10962: accept FlightData messages generated by Protobuf (which can omit empty fields). */
+ @Test
+ public void testProtobufSchemaCompatibility() throws Exception {
+ final Schema schema = new Schema(Collections.singletonList(Field.nullable("foo", new ArrowType.Int(32, true))));
+ try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE)) {
+ final MethodDescriptor.Marshaller<ArrowMessage> marshaller = ArrowMessage.createMarshaller(allocator);
+ Flight.FlightDescriptor descriptor = FlightDescriptor.command(new byte[0]).toProtocol();
+ try (final ArrowMessage message = new ArrowMessage(descriptor, schema, IpcOption.DEFAULT)) {
+ Assert.assertEquals(ArrowMessage.HeaderType.SCHEMA, message.getMessageType());
+ // Should have no body buffers
+ Assert.assertFalse(message.getBufs().iterator().hasNext());
+ final Flight.FlightData protobufData = arrowMessageToProtobuf(marshaller, message)
+ .toBuilder()
+ .setDataBody(ByteString.EMPTY)
+ .build();
+ Assert.assertEquals(0, protobufData.getDataBody().size());
+ final ArrowMessage parsedMessage = marshaller.parse(new ByteArrayInputStream(protobufData.toByteArray()));
+ // Should have no body buffers
+ Assert.assertFalse(parsedMessage.getBufs().iterator().hasNext());
+ // Should not throw
+ parsedMessage.asSchema();
+ }
+ }
+ }
+
+ /**
+ * An example FlightProducer for test purposes.
+ */
+ public static class Producer implements FlightProducer, AutoCloseable {
+ static final byte[] TICKET_LARGE_BATCH = "large-batch".getBytes(StandardCharsets.UTF_8);
+
+ private final BufferAllocator allocator;
+
+ public Producer(BufferAllocator allocator) {
+ super();
+ this.allocator = allocator;
+ }
+
+ @Override
+ public void listFlights(CallContext context, Criteria criteria,
+ StreamListener<FlightInfo> listener) {
+ if (criteria.getExpression().length > 0) {
+ // Don't send anything if criteria are set
+ listener.onCompleted();
+ }
+
+ Flight.FlightInfo getInfo = Flight.FlightInfo.newBuilder()
+ .setFlightDescriptor(Flight.FlightDescriptor.newBuilder()
+ .setType(DescriptorType.CMD)
+ .setCmd(ByteString.copyFrom("cool thing", Charsets.UTF_8)))
+ .build();
+ try {
+ listener.onNext(new FlightInfo(getInfo));
+ } catch (URISyntaxException e) {
+ listener.onError(e);
+ return;
+ }
+ listener.onCompleted();
+ }
+
+ @Override
+ public Runnable acceptPut(CallContext context, FlightStream flightStream, StreamListener<PutResult> ackStream) {
+ return () -> {
+ while (flightStream.next()) {
+ // Drain the stream
+ }
+ };
+ }
+
+ @Override
+ public void getStream(CallContext context, Ticket ticket, ServerStreamListener listener) {
+ if (Arrays.equals(TICKET_LARGE_BATCH, ticket.getBytes())) {
+ getLargeBatch(listener);
+ return;
+ }
+ final int size = 10;
+
+ IntVector iv = new IntVector("c1", allocator);
+ VectorSchemaRoot root = VectorSchemaRoot.of(iv);
+ listener.start(root);
+
+ //batch 1
+ root.allocateNew();
+ for (int i = 0; i < size; i++) {
+ iv.set(i, i);
+ }
+ iv.setValueCount(size);
+ root.setRowCount(size);
+ listener.putNext();
+
+ // batch 2
+
+ root.allocateNew();
+ for (int i = 0; i < size; i++) {
+ iv.set(i, i + size);
+ }
+ iv.setValueCount(size);
+ root.setRowCount(size);
+ listener.putNext();
+ root.clear();
+ listener.completed();
+ }
+
+ private void getLargeBatch(ServerStreamListener listener) {
+ final List<FieldVector> vectors = new ArrayList<>();
+ for (int col = 0; col < 128; col++) {
+ final BigIntVector vector = new BigIntVector("f" + col, allocator);
+ for (int row = 0; row < 65536; row++) {
+ vector.setSafe(row, row);
+ }
+ vectors.add(vector);
+ }
+ try (final VectorSchemaRoot root = new VectorSchemaRoot(vectors)) {
+ root.setRowCount(65536);
+ listener.start(root);
+ listener.putNext();
+ listener.putNext();
+ listener.completed();
+ }
+ }
+
+ @Override
+ public void close() throws Exception {
+ allocator.close();
+ }
+
+ @Override
+ public FlightInfo getFlightInfo(CallContext context,
+ FlightDescriptor descriptor) {
+ try {
+ Flight.FlightInfo getInfo = Flight.FlightInfo.newBuilder()
+ .setFlightDescriptor(Flight.FlightDescriptor.newBuilder()
+ .setType(DescriptorType.CMD)
+ .setCmd(ByteString.copyFrom("cool thing", Charsets.UTF_8)))
+ .addEndpoint(
+ Flight.FlightEndpoint.newBuilder().addLocation(new Location("https://example.com").toProtocol()))
+ .build();
+ return new FlightInfo(getInfo);
+ } catch (URISyntaxException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Override
+ public void doAction(CallContext context, Action action,
+ StreamListener<Result> listener) {
+ switch (action.getType()) {
+ case "hello": {
+ listener.onNext(new Result("world".getBytes(Charsets.UTF_8)));
+ listener.onCompleted();
+ break;
+ }
+ case "hellooo": {
+ listener.onNext(new Result("world".getBytes(Charsets.UTF_8)));
+ listener.onNext(new Result("!".getBytes(Charsets.UTF_8)));
+ listener.onCompleted();
+ break;
+ }
+ default:
+ listener.onError(CallStatus.UNIMPLEMENTED.withDescription("Action not implemented: " + action.getType())
+ .toRuntimeException());
+ }
+ }
+
+ @Override
+ public void listActions(CallContext context,
+ StreamListener<ActionType> listener) {
+ listener.onNext(new ActionType("get", ""));
+ listener.onNext(new ActionType("put", ""));
+ listener.onNext(new ActionType("hello", ""));
+ listener.onCompleted();
+ }
+
+ }
+
+
+}
diff --git a/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestCallOptions.java b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestCallOptions.java
new file mode 100644
index 000000000..45e3e4960
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestCallOptions.java
@@ -0,0 +1,191 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.io.IOException;
+import java.time.Duration;
+import java.time.Instant;
+import java.util.Iterator;
+import java.util.concurrent.TimeUnit;
+import java.util.function.Consumer;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.junit.Assert;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import io.grpc.Metadata;
+
+public class TestCallOptions {
+
+ @Test
+ @Ignore
+ public void timeoutFires() {
+ // Ignored due to CI flakiness
+ test((client) -> {
+ Instant start = Instant.now();
+ Iterator<Result> results = client.doAction(new Action("hang"), CallOptions.timeout(1, TimeUnit.SECONDS));
+ try {
+ results.next();
+ Assert.fail("Call should have failed");
+ } catch (RuntimeException e) {
+ Assert.assertTrue(e.getMessage(), e.getMessage().contains("deadline exceeded"));
+ }
+ Instant end = Instant.now();
+ Assert.assertTrue("Call took over 1500 ms despite timeout", Duration.between(start, end).toMillis() < 1500);
+ });
+ }
+
+ @Test
+ @Ignore
+ public void underTimeout() {
+ // Ignored due to CI flakiness
+ test((client) -> {
+ Instant start = Instant.now();
+ // This shouldn't fail and it should complete within the timeout
+ Iterator<Result> results = client.doAction(new Action("fast"), CallOptions.timeout(2, TimeUnit.SECONDS));
+ Assert.assertArrayEquals(new byte[]{42, 42}, results.next().getBody());
+ Instant end = Instant.now();
+ Assert.assertTrue("Call took over 2500 ms despite timeout", Duration.between(start, end).toMillis() < 2500);
+ });
+ }
+
+ @Test
+ public void singleProperty() {
+ final FlightCallHeaders headers = new FlightCallHeaders();
+ headers.insert("key", "value");
+ testHeaders(headers);
+ }
+
+ @Test
+ public void multipleProperties() {
+ final FlightCallHeaders headers = new FlightCallHeaders();
+ headers.insert("key", "value");
+ headers.insert("key2", "value2");
+ testHeaders(headers);
+ }
+
+ @Test
+ public void binaryProperties() {
+ final FlightCallHeaders headers = new FlightCallHeaders();
+ headers.insert("key-bin", "value".getBytes());
+ headers.insert("key3-bin", "ëfßæ".getBytes());
+ testHeaders(headers);
+ }
+
+ @Test
+ public void mixedProperties() {
+ final FlightCallHeaders headers = new FlightCallHeaders();
+ headers.insert("key", "value");
+ headers.insert("key3-bin", "ëfßæ".getBytes());
+ testHeaders(headers);
+ }
+
+ private void testHeaders(CallHeaders headers) {
+ try (
+ BufferAllocator a = new RootAllocator(Long.MAX_VALUE);
+ HeaderProducer producer = new HeaderProducer();
+ FlightServer s =
+ FlightTestUtil.getStartedServer((location) -> FlightServer.builder(a, location, producer).build());
+ FlightClient client = FlightClient.builder(a, s.getLocation()).build()) {
+ client.doAction(new Action(""), new HeaderCallOption(headers)).hasNext();
+
+ final CallHeaders incomingHeaders = producer.headers();
+ for (String key : headers.keys()) {
+ if (key.endsWith(Metadata.BINARY_HEADER_SUFFIX)) {
+ Assert.assertArrayEquals(headers.getByte(key), incomingHeaders.getByte(key));
+ } else {
+ Assert.assertEquals(headers.get(key), incomingHeaders.get(key));
+ }
+ }
+ } catch (InterruptedException | IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ void test(Consumer<FlightClient> testFn) {
+ try (
+ BufferAllocator a = new RootAllocator(Long.MAX_VALUE);
+ Producer producer = new Producer();
+ FlightServer s =
+ FlightTestUtil.getStartedServer((location) -> FlightServer.builder(a, location, producer).build());
+ FlightClient client = FlightClient.builder(a, s.getLocation()).build()) {
+ testFn.accept(client);
+ } catch (InterruptedException | IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ static class HeaderProducer extends NoOpFlightProducer implements AutoCloseable {
+ CallHeaders headers;
+
+ @Override
+ public void close() {
+ }
+
+ public CallHeaders headers() {
+ return headers;
+ }
+
+ @Override
+ public void doAction(CallContext context, Action action, StreamListener<Result> listener) {
+ this.headers = context.getMiddleware(FlightConstants.HEADER_KEY).headers();
+ listener.onCompleted();
+ }
+ }
+
+ static class Producer extends NoOpFlightProducer implements AutoCloseable {
+
+ Producer() {
+ }
+
+ @Override
+ public void close() {
+ }
+
+ @Override
+ public void doAction(CallContext context, Action action, StreamListener<Result> listener) {
+ switch (action.getType()) {
+ case "hang": {
+ try {
+ Thread.sleep(25000);
+ } catch (InterruptedException e) {
+ throw new RuntimeException(e);
+ }
+ listener.onNext(new Result(new byte[]{}));
+ listener.onCompleted();
+ return;
+ }
+ case "fast": {
+ try {
+ Thread.sleep(500);
+ } catch (InterruptedException e) {
+ throw new RuntimeException(e);
+ }
+ listener.onNext(new Result(new byte[]{42, 42}));
+ listener.onCompleted();
+ return;
+ }
+ default: {
+ throw new UnsupportedOperationException(action.getType());
+ }
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestClientMiddleware.java b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestClientMiddleware.java
new file mode 100644
index 000000000..ccfc9f2d1
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestClientMiddleware.java
@@ -0,0 +1,359 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.function.BiConsumer;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * A basic test of client middleware using a simplified OpenTracing-like example.
+ */
+@RunWith(JUnit4.class)
+public class TestClientMiddleware {
+
+ /**
+ * Test that a client middleware can fail a call before it starts by throwing a {@link FlightRuntimeException}.
+ */
+ @Test
+ public void clientMiddleware_failCallBeforeSending() {
+ test(new NoOpFlightProducer(), null, Collections.singletonList(new CallRejector.Factory()),
+ (allocator, client) -> {
+ FlightTestUtil.assertCode(FlightStatusCode.UNAVAILABLE, client::listActions);
+ });
+ }
+
+ /**
+ * Test an OpenTracing-like scenario where client and server middleware work together to propagate a request ID
+ * without explicit intervention from the service implementation.
+ */
+ @Test
+ public void middleware_propagateHeader() {
+ final Context context = new Context("span id");
+ test(new NoOpFlightProducer(),
+ new TestServerMiddleware.ServerMiddlewarePair<>(
+ FlightServerMiddleware.Key.of("test"), new ServerSpanInjector.Factory()),
+ Collections.singletonList(new ClientSpanInjector.Factory(context)),
+ (allocator, client) -> {
+ FlightTestUtil.assertCode(FlightStatusCode.UNIMPLEMENTED, () -> client.listActions().forEach(actionType -> {
+ }));
+ });
+ Assert.assertEquals(context.outgoingSpanId, context.incomingSpanId);
+ Assert.assertNotNull(context.finalStatus);
+ Assert.assertEquals(FlightStatusCode.UNIMPLEMENTED, context.finalStatus.code());
+ }
+
+ /** Ensure both server and client can send and receive multi-valued headers (both binary and text values). */
+ @Test
+ public void testMultiValuedHeaders() {
+ final MultiHeaderClientMiddlewareFactory clientFactory = new MultiHeaderClientMiddlewareFactory();
+ test(new NoOpFlightProducer(),
+ new TestServerMiddleware.ServerMiddlewarePair<>(
+ FlightServerMiddleware.Key.of("test"), new MultiHeaderServerMiddlewareFactory()),
+ Collections.singletonList(clientFactory),
+ (allocator, client) -> {
+ FlightTestUtil.assertCode(FlightStatusCode.UNIMPLEMENTED, () -> client.listActions().forEach(actionType -> {
+ }));
+ });
+ // The server echoes the headers we send back to us, so ensure all the ones we sent are present with the correct
+ // values in the correct order.
+ for (final Map.Entry<String, List<byte[]>> entry : EXPECTED_BINARY_HEADERS.entrySet()) {
+ // Compare header values entry-by-entry because byte arrays don't compare via equals
+ final List<byte[]> receivedValues = clientFactory.lastBinaryHeaders.get(entry.getKey());
+ Assert.assertNotNull("Missing for header: " + entry.getKey(), receivedValues);
+ Assert.assertEquals(
+ "Missing or wrong value for header: " + entry.getKey(),
+ entry.getValue().size(), receivedValues.size());
+ for (int i = 0; i < entry.getValue().size(); i++) {
+ Assert.assertArrayEquals(entry.getValue().get(i), receivedValues.get(i));
+ }
+ }
+ for (final Map.Entry<String, List<String>> entry : EXPECTED_TEXT_HEADERS.entrySet()) {
+ Assert.assertEquals(
+ "Missing or wrong value for header: " + entry.getKey(),
+ entry.getValue(), clientFactory.lastTextHeaders.get(entry.getKey()));
+ }
+ }
+
+ private static <T extends FlightServerMiddleware> void test(FlightProducer producer,
+ TestServerMiddleware.ServerMiddlewarePair<T> serverMiddleware,
+ List<FlightClientMiddleware.Factory> clientMiddleware,
+ BiConsumer<BufferAllocator, FlightClient> body) {
+ try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE)) {
+ final FlightServer server = FlightTestUtil
+ .getStartedServer(location -> {
+ final FlightServer.Builder builder = FlightServer.builder(allocator, location, producer);
+ if (serverMiddleware != null) {
+ builder.middleware(serverMiddleware.key, serverMiddleware.factory);
+ }
+ return builder.build();
+ });
+ FlightClient.Builder builder = FlightClient.builder(allocator, server.getLocation());
+ clientMiddleware.forEach(builder::intercept);
+ try (final FlightServer ignored = server;
+ final FlightClient client = builder.build()
+ ) {
+ body.accept(allocator, client);
+ }
+ } catch (InterruptedException | IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ /**
+ * A server middleware component that reads a request ID from incoming headers and sends the request ID back on
+ * outgoing headers.
+ */
+ static class ServerSpanInjector implements FlightServerMiddleware {
+
+ private final String spanId;
+
+ public ServerSpanInjector(String spanId) {
+ this.spanId = spanId;
+ }
+
+ @Override
+ public void onBeforeSendingHeaders(CallHeaders outgoingHeaders) {
+ outgoingHeaders.insert("x-span", spanId);
+ }
+
+ @Override
+ public void onCallCompleted(CallStatus status) {
+
+ }
+
+ @Override
+ public void onCallErrored(Throwable err) {
+
+ }
+
+ static class Factory implements FlightServerMiddleware.Factory<ServerSpanInjector> {
+
+ @Override
+ public ServerSpanInjector onCallStarted(CallInfo info, CallHeaders incomingHeaders, RequestContext context) {
+ return new ServerSpanInjector(incomingHeaders.get("x-span"));
+ }
+ }
+ }
+
+ /**
+ * A client middleware component that, given a mock OpenTracing-like "request context", sends the request ID in the
+ * context on outgoing headers and reads it from incoming headers.
+ */
+ static class ClientSpanInjector implements FlightClientMiddleware {
+
+ private final Context context;
+
+ public ClientSpanInjector(Context context) {
+ this.context = context;
+ }
+
+ @Override
+ public void onBeforeSendingHeaders(CallHeaders outgoingHeaders) {
+ outgoingHeaders.insert("x-span", context.outgoingSpanId);
+ }
+
+ @Override
+ public void onHeadersReceived(CallHeaders incomingHeaders) {
+ context.incomingSpanId = incomingHeaders.get("x-span");
+ }
+
+ @Override
+ public void onCallCompleted(CallStatus status) {
+ context.finalStatus = status;
+ }
+
+ static class Factory implements FlightClientMiddleware.Factory {
+
+ private final Context context;
+
+ Factory(Context context) {
+ this.context = context;
+ }
+
+ @Override
+ public FlightClientMiddleware onCallStarted(CallInfo info) {
+ return new ClientSpanInjector(context);
+ }
+ }
+ }
+
+ /**
+ * A mock OpenTracing-like "request context".
+ */
+ static class Context {
+
+ final String outgoingSpanId;
+ String incomingSpanId;
+ CallStatus finalStatus;
+
+ Context(String spanId) {
+ this.outgoingSpanId = spanId;
+ }
+ }
+
+ /**
+ * A client middleware that fails outgoing calls.
+ */
+ static class CallRejector implements FlightClientMiddleware {
+
+ @Override
+ public void onBeforeSendingHeaders(CallHeaders outgoingHeaders) {
+ }
+
+ @Override
+ public void onHeadersReceived(CallHeaders incomingHeaders) {
+ }
+
+ @Override
+ public void onCallCompleted(CallStatus status) {
+ }
+
+ static class Factory implements FlightClientMiddleware.Factory {
+
+ @Override
+ public FlightClientMiddleware onCallStarted(CallInfo info) {
+ throw CallStatus.UNAVAILABLE.withDescription("Rejecting call.").toRuntimeException();
+ }
+ }
+ }
+
+ // Used to test that middleware can send and receive multi-valued text and binary headers.
+ static final Map<String, List<byte[]>> EXPECTED_BINARY_HEADERS = new HashMap<String, List<byte[]>>() {{
+ put("x-binary-bin", Arrays.asList(new byte[] {0}, new byte[]{1}));
+ }};
+ static final Map<String, List<String>> EXPECTED_TEXT_HEADERS = new HashMap<String, List<String>>() {{
+ put("x-text", Arrays.asList("foo", "bar"));
+ }};
+
+ static class MultiHeaderServerMiddlewareFactory implements
+ FlightServerMiddleware.Factory<MultiHeaderServerMiddleware> {
+ @Override
+ public MultiHeaderServerMiddleware onCallStarted(CallInfo info, CallHeaders incomingHeaders,
+ RequestContext context) {
+ // Echo the headers back to the client. Copy values out of CallHeaders since the underlying gRPC metadata
+ // object isn't safe to use after this function returns.
+ Map<String, List<byte[]>> binaryHeaders = new HashMap<>();
+ Map<String, List<String>> textHeaders = new HashMap<>();
+ for (final String key : incomingHeaders.keys()) {
+ if (key.endsWith("-bin")) {
+ binaryHeaders.compute(key, (ignored, values) -> {
+ if (values == null) {
+ values = new ArrayList<>();
+ }
+ incomingHeaders.getAllByte(key).forEach(values::add);
+ return values;
+ });
+ } else {
+ textHeaders.compute(key, (ignored, values) -> {
+ if (values == null) {
+ values = new ArrayList<>();
+ }
+ incomingHeaders.getAll(key).forEach(values::add);
+ return values;
+ });
+ }
+ }
+ return new MultiHeaderServerMiddleware(binaryHeaders, textHeaders);
+ }
+ }
+
+ static class MultiHeaderServerMiddleware implements FlightServerMiddleware {
+ private final Map<String, List<byte[]>> binaryHeaders;
+ private final Map<String, List<String>> textHeaders;
+
+ MultiHeaderServerMiddleware(Map<String, List<byte[]>> binaryHeaders, Map<String, List<String>> textHeaders) {
+ this.binaryHeaders = binaryHeaders;
+ this.textHeaders = textHeaders;
+ }
+
+ @Override
+ public void onBeforeSendingHeaders(CallHeaders outgoingHeaders) {
+ binaryHeaders.forEach((key, values) -> values.forEach(value -> outgoingHeaders.insert(key, value)));
+ textHeaders.forEach((key, values) -> values.forEach(value -> outgoingHeaders.insert(key, value)));
+ }
+
+ @Override
+ public void onCallCompleted(CallStatus status) {}
+
+ @Override
+ public void onCallErrored(Throwable err) {}
+ }
+
+ static class MultiHeaderClientMiddlewareFactory implements FlightClientMiddleware.Factory {
+ Map<String, List<byte[]>> lastBinaryHeaders = null;
+ Map<String, List<String>> lastTextHeaders = null;
+
+ @Override
+ public FlightClientMiddleware onCallStarted(CallInfo info) {
+ return new MultiHeaderClientMiddleware(this);
+ }
+ }
+
+ static class MultiHeaderClientMiddleware implements FlightClientMiddleware {
+ private final MultiHeaderClientMiddlewareFactory factory;
+
+ public MultiHeaderClientMiddleware(MultiHeaderClientMiddlewareFactory factory) {
+ this.factory = factory;
+ }
+
+ @Override
+ public void onBeforeSendingHeaders(CallHeaders outgoingHeaders) {
+ for (final Map.Entry<String, List<byte[]>> entry : EXPECTED_BINARY_HEADERS.entrySet()) {
+ entry.getValue().forEach((value) -> outgoingHeaders.insert(entry.getKey(), value));
+ Assert.assertTrue(outgoingHeaders.containsKey(entry.getKey()));
+ }
+ for (final Map.Entry<String, List<String>> entry : EXPECTED_TEXT_HEADERS.entrySet()) {
+ entry.getValue().forEach((value) -> outgoingHeaders.insert(entry.getKey(), value));
+ Assert.assertTrue(outgoingHeaders.containsKey(entry.getKey()));
+ }
+ }
+
+ @Override
+ public void onHeadersReceived(CallHeaders incomingHeaders) {
+ factory.lastBinaryHeaders = new HashMap<>();
+ factory.lastTextHeaders = new HashMap<>();
+ incomingHeaders.keys().forEach(header -> {
+ if (header.endsWith("-bin")) {
+ final List<byte[]> values = new ArrayList<>();
+ incomingHeaders.getAllByte(header).forEach(values::add);
+ factory.lastBinaryHeaders.put(header, values);
+ } else {
+ final List<String> values = new ArrayList<>();
+ incomingHeaders.getAll(header).forEach(values::add);
+ factory.lastTextHeaders.put(header, values);
+ }
+ });
+ }
+
+ @Override
+ public void onCallCompleted(CallStatus status) {}
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestDictionaryUtils.java b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestDictionaryUtils.java
new file mode 100644
index 000000000..b5bf117c6
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestDictionaryUtils.java
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.util.TreeSet;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.Test;
+
+import com.google.common.collect.ImmutableList;
+
+/**
+ * Test cases for {@link DictionaryUtils}.
+ */
+public class TestDictionaryUtils {
+
+ @Test
+ public void testReuseSchema() {
+ FieldType varcharType = new FieldType(true, new ArrowType.Utf8(), null);
+ FieldType intType = new FieldType(true, new ArrowType.Int(32, true), null);
+
+ ImmutableList<Field> build = ImmutableList.of(
+ new Field("stringCol", varcharType, null),
+ new Field("intCol", intType, null));
+
+ Schema schema = new Schema(build);
+ Schema newSchema = DictionaryUtils.generateSchema(schema, null, new TreeSet<>());
+
+ // assert that no new schema is created.
+ assertTrue(schema == newSchema);
+ }
+
+ @Test
+ public void testCreateSchema() {
+ try (BufferAllocator allocator = new RootAllocator(1024)) {
+ DictionaryEncoding dictionaryEncoding =
+ new DictionaryEncoding(0, true, new ArrowType.Int(8, true));
+ VarCharVector dictVec = new VarCharVector("dict vector", allocator);
+ Dictionary dictionary = new Dictionary(dictVec, dictionaryEncoding);
+ DictionaryProvider dictProvider = new DictionaryProvider.MapDictionaryProvider(dictionary);
+ TreeSet<Long> dictionaryUsed = new TreeSet<>();
+
+ FieldType encodedVarcharType = new FieldType(true, new ArrowType.Int(8, true), dictionaryEncoding);
+ FieldType intType = new FieldType(true, new ArrowType.Int(32, true), null);
+
+ ImmutableList<Field> build = ImmutableList.of(
+ new Field("stringCol", encodedVarcharType, null),
+ new Field("intCol", intType, null));
+
+ Schema schema = new Schema(build);
+ Schema newSchema = DictionaryUtils.generateSchema(schema, dictProvider, dictionaryUsed);
+
+ // assert that a new schema is created.
+ assertTrue(schema != newSchema);
+
+ // assert the column is converted as expected
+ ArrowType newColType = newSchema.getFields().get(0).getType();
+ assertEquals(new ArrowType.Utf8(), newColType);
+
+ assertEquals(1, dictionaryUsed.size());
+ assertEquals(0, dictionaryUsed.first());
+ }
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestDoExchange.java b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestDoExchange.java
new file mode 100644
index 000000000..70394e11e
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestDoExchange.java
@@ -0,0 +1,536 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.stream.IntStream;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.util.AutoCloseables;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.VectorLoader;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.VectorUnloader;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.testing.ValueVectorDataPopulator;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestDoExchange {
+ static byte[] EXCHANGE_DO_GET = "do-get".getBytes(StandardCharsets.UTF_8);
+ static byte[] EXCHANGE_DO_PUT = "do-put".getBytes(StandardCharsets.UTF_8);
+ static byte[] EXCHANGE_ECHO = "echo".getBytes(StandardCharsets.UTF_8);
+ static byte[] EXCHANGE_METADATA_ONLY = "only-metadata".getBytes(StandardCharsets.UTF_8);
+ static byte[] EXCHANGE_TRANSFORM = "transform".getBytes(StandardCharsets.UTF_8);
+ static byte[] EXCHANGE_CANCEL = "cancel".getBytes(StandardCharsets.UTF_8);
+
+ private BufferAllocator allocator;
+ private FlightServer server;
+ private FlightClient client;
+
+ @Before
+ public void setUp() throws Exception {
+ allocator = new RootAllocator(Integer.MAX_VALUE);
+ final Location serverLocation = Location.forGrpcInsecure(FlightTestUtil.LOCALHOST, 0);
+ server = FlightServer.builder(allocator, serverLocation, new Producer(allocator)).build();
+ server.start();
+ final Location clientLocation = Location.forGrpcInsecure(FlightTestUtil.LOCALHOST, server.getPort());
+ client = FlightClient.builder(allocator, clientLocation).build();
+ }
+
+ @After
+ public void tearDown() throws Exception {
+ AutoCloseables.close(client, server, allocator);
+ }
+
+ /** Test a pure-metadata flow. */
+ @Test
+ public void testDoExchangeOnlyMetadata() throws Exception {
+ // Send a particular descriptor to the server and check for a particular response pattern.
+ try (final FlightClient.ExchangeReaderWriter stream =
+ client.doExchange(FlightDescriptor.command(EXCHANGE_METADATA_ONLY))) {
+ final FlightStream reader = stream.getReader();
+
+ // Server starts by sending a message without data (hence no VectorSchemaRoot should be present)
+ assertTrue(reader.next());
+ assertFalse(reader.hasRoot());
+ assertEquals(42, reader.getLatestMetadata().getInt(0));
+
+ // Write a metadata message to the server (without sending any data)
+ ArrowBuf buf = allocator.buffer(4);
+ buf.writeInt(84);
+ stream.getWriter().putMetadata(buf);
+
+ // Check that the server echoed the metadata back to us
+ assertTrue(reader.next());
+ assertFalse(reader.hasRoot());
+ assertEquals(84, reader.getLatestMetadata().getInt(0));
+
+ // Close our write channel and ensure the server also closes theirs
+ stream.getWriter().completed();
+ assertFalse(reader.next());
+ }
+ }
+
+ /** Emulate a DoGet with a DoExchange. */
+ @Test
+ public void testDoExchangeDoGet() throws Exception {
+ try (final FlightClient.ExchangeReaderWriter stream =
+ client.doExchange(FlightDescriptor.command(EXCHANGE_DO_GET))) {
+ final FlightStream reader = stream.getReader();
+ VectorSchemaRoot root = reader.getRoot();
+ IntVector iv = (IntVector) root.getVector("a");
+ int value = 0;
+ while (reader.next()) {
+ for (int i = 0; i < root.getRowCount(); i++) {
+ assertFalse(String.format("Row %d should not be null", value), iv.isNull(i));
+ assertEquals(value, iv.get(i));
+ value++;
+ }
+ }
+ assertEquals(100, value);
+ }
+ }
+
+ /** Emulate a DoPut with a DoExchange. */
+ @Test
+ public void testDoExchangeDoPut() throws Exception {
+ final Schema schema = new Schema(Collections.singletonList(Field.nullable("a", new ArrowType.Int(32, true))));
+ try (final FlightClient.ExchangeReaderWriter stream =
+ client.doExchange(FlightDescriptor.command(EXCHANGE_DO_PUT));
+ final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) {
+ IntVector iv = (IntVector) root.getVector("a");
+ iv.allocateNew();
+
+ stream.getWriter().start(root);
+ int counter = 0;
+ for (int i = 0; i < 10; i++) {
+ ValueVectorDataPopulator.setVector(iv, IntStream.range(0, i).boxed().toArray(Integer[]::new));
+ root.setRowCount(i);
+ counter += i;
+ stream.getWriter().putNext();
+
+ assertTrue(stream.getReader().next());
+ assertFalse(stream.getReader().hasRoot());
+ // For each write, the server sends back a metadata message containing the index of the last written batch
+ final ArrowBuf metadata = stream.getReader().getLatestMetadata();
+ assertEquals(counter, metadata.getInt(0));
+ }
+ stream.getWriter().completed();
+
+ while (stream.getReader().next()) {
+ // Drain the stream. Otherwise closing the stream sends a CANCEL which seriously screws with the server.
+ // CANCEL -> runs onCancel handler -> closes the FlightStream early
+ }
+ }
+ }
+
+ /** Test a DoExchange that echoes the client message. */
+ @Test
+ public void testDoExchangeEcho() throws Exception {
+ final Schema schema = new Schema(Collections.singletonList(Field.nullable("a", new ArrowType.Int(32, true))));
+ try (final FlightClient.ExchangeReaderWriter stream = client.doExchange(FlightDescriptor.command(EXCHANGE_ECHO));
+ final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) {
+ final FlightStream reader = stream.getReader();
+
+ // First try writing metadata without starting the Arrow data stream
+ ArrowBuf buf = allocator.buffer(4);
+ buf.writeInt(42);
+ stream.getWriter().putMetadata(buf);
+ buf = allocator.buffer(4);
+ buf.writeInt(84);
+ stream.getWriter().putMetadata(buf);
+
+ // Ensure that the server echoes the metadata back, also without starting its data stream
+ assertTrue(reader.next());
+ assertFalse(reader.hasRoot());
+ assertEquals(42, reader.getLatestMetadata().getInt(0));
+ assertTrue(reader.next());
+ assertFalse(reader.hasRoot());
+ assertEquals(84, reader.getLatestMetadata().getInt(0));
+
+ // Write data and check that it gets echoed back.
+ IntVector iv = (IntVector) root.getVector("a");
+ iv.allocateNew();
+ stream.getWriter().start(root);
+ for (int i = 0; i < 10; i++) {
+ iv.setSafe(0, i);
+ root.setRowCount(1);
+ stream.getWriter().putNext();
+
+ assertTrue(reader.next());
+ assertNull(reader.getLatestMetadata());
+ assertEquals(root.getSchema(), reader.getSchema());
+ assertEquals(i, ((IntVector) reader.getRoot().getVector("a")).get(0));
+ }
+
+ // Complete the stream so that the server knows not to expect any more messages from us.
+ stream.getWriter().completed();
+ // The server will end its side of the call, so this shouldn't block or indicate that
+ // there is more data.
+ assertFalse("We should not be waiting for any messages", reader.next());
+ }
+ }
+
+ /** Write some data, have it transformed, then read it back. */
+ @Test
+ public void testTransform() throws Exception {
+ final Schema schema = new Schema(Arrays.asList(
+ Field.nullable("a", new ArrowType.Int(32, true)),
+ Field.nullable("b", new ArrowType.Int(32, true))));
+ try (final FlightClient.ExchangeReaderWriter stream =
+ client.doExchange(FlightDescriptor.command(EXCHANGE_TRANSFORM))) {
+ // Write ten batches of data to the stream, where batch N contains N rows of data (N in [0, 10))
+ final FlightStream reader = stream.getReader();
+ final FlightClient.ClientStreamListener writer = stream.getWriter();
+ try (final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) {
+ writer.start(root);
+ for (int batchIndex = 0; batchIndex < 10; batchIndex++) {
+ for (final FieldVector rawVec : root.getFieldVectors()) {
+ final IntVector vec = (IntVector) rawVec;
+ ValueVectorDataPopulator.setVector(vec, IntStream.range(0, batchIndex).boxed().toArray(Integer[]::new));
+ }
+ root.setRowCount(batchIndex);
+ writer.putNext();
+ }
+ }
+ // Indicate that we're done writing so that the server does not expect more data.
+ writer.completed();
+
+ // Read back data. We expect the server to double each value in each row of each batch.
+ assertEquals(schema, reader.getSchema());
+ final VectorSchemaRoot root = reader.getRoot();
+ for (int batchIndex = 0; batchIndex < 10; batchIndex++) {
+ assertTrue("Didn't receive batch #" + batchIndex, reader.next());
+ assertEquals(batchIndex, root.getRowCount());
+ for (final FieldVector rawVec : root.getFieldVectors()) {
+ final IntVector vec = (IntVector) rawVec;
+ for (int row = 0; row < batchIndex; row++) {
+ assertEquals(2 * row, vec.get(row));
+ }
+ }
+ }
+
+ // The server also sends back a metadata-only message containing the message count
+ assertTrue("There should be one extra message", reader.next());
+ assertEquals(10, reader.getLatestMetadata().getInt(0));
+ assertFalse("There should be no more data", reader.next());
+ }
+ }
+
+ /** Write some data, have it transformed, then read it back. Use the zero-copy optimization. */
+ @Test
+ public void testTransformZeroCopy() throws Exception {
+ final int rowsPerBatch = 4096;
+ final Schema schema = new Schema(Arrays.asList(
+ Field.nullable("a", new ArrowType.Int(32, true)),
+ Field.nullable("b", new ArrowType.Int(32, true))));
+ try (final FlightClient.ExchangeReaderWriter stream =
+ client.doExchange(FlightDescriptor.command(EXCHANGE_TRANSFORM))) {
+ // Write ten batches of data to the stream, where batch N contains 1024 rows of data (N in [0, 10))
+ final FlightStream reader = stream.getReader();
+ final FlightClient.ClientStreamListener writer = stream.getWriter();
+ try (final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) {
+ writer.start(root);
+ // Enable the zero-copy optimization
+ writer.setUseZeroCopy(true);
+ for (int batchIndex = 0; batchIndex < 100; batchIndex++) {
+ for (final FieldVector rawVec : root.getFieldVectors()) {
+ final IntVector vec = (IntVector) rawVec;
+ for (int row = 0; row < rowsPerBatch; row++) {
+ // Use a value that'll be different per batch, so we can detect if we accidentally
+ // reuse a buffer (and overwrite a buffer that hasn't yet been sent over the network)
+ vec.setSafe(row, batchIndex + row);
+ }
+ }
+ root.setRowCount(rowsPerBatch);
+ writer.putNext();
+ // Allocate new buffers every time since we don't know if gRPC has written the buffer
+ // to the network yet
+ root.allocateNew();
+ }
+ }
+ // Indicate that we're done writing so that the server does not expect more data.
+ writer.completed();
+
+ // Read back data. We expect the server to double each value in each row of each batch.
+ assertEquals(schema, reader.getSchema());
+ final VectorSchemaRoot root = reader.getRoot();
+ for (int batchIndex = 0; batchIndex < 100; batchIndex++) {
+ assertTrue("Didn't receive batch #" + batchIndex, reader.next());
+ assertEquals(rowsPerBatch, root.getRowCount());
+ for (final FieldVector rawVec : root.getFieldVectors()) {
+ final IntVector vec = (IntVector) rawVec;
+ for (int row = 0; row < rowsPerBatch; row++) {
+ assertEquals(2 * (batchIndex + row), vec.get(row));
+ }
+ }
+ }
+
+ // The server also sends back a metadata-only message containing the message count
+ assertTrue("There should be one extra message", reader.next());
+ assertEquals(100, reader.getLatestMetadata().getInt(0));
+ assertFalse("There should be no more data", reader.next());
+ }
+ }
+
+ /** Have the server immediately cancel; ensure the client doesn't hang. */
+ @Test
+ public void testServerCancel() throws Exception {
+ try (final FlightClient.ExchangeReaderWriter stream =
+ client.doExchange(FlightDescriptor.command(EXCHANGE_CANCEL))) {
+ final FlightStream reader = stream.getReader();
+ final FlightClient.ClientStreamListener writer = stream.getWriter();
+
+ final FlightRuntimeException fre = assertThrows(FlightRuntimeException.class, reader::next);
+ assertEquals(FlightStatusCode.CANCELLED, fre.status().code());
+ assertEquals("expected", fre.status().description());
+
+ // Before, this would hang forever, because the writer checks if the stream is ready and not cancelled.
+ // However, the cancellation flag (was) only updated by reading, and the stream is never ready once the call ends.
+ // The test looks weird since normally, an application shouldn't try to write after the read fails. However,
+ // an application that isn't reading data wouldn't notice, and would instead get stuck on the write.
+ // Here, we read first to avoid a race condition in the test itself.
+ writer.putMetadata(allocator.getEmpty());
+ }
+ }
+
+ /** Have the server immediately cancel; ensure the server cleans up the FlightStream. */
+ @Test
+ public void testServerCancelLeak() throws Exception {
+ try (final FlightClient.ExchangeReaderWriter stream =
+ client.doExchange(FlightDescriptor.command(EXCHANGE_CANCEL))) {
+ final FlightStream reader = stream.getReader();
+ final FlightClient.ClientStreamListener writer = stream.getWriter();
+ try (final VectorSchemaRoot root = VectorSchemaRoot.create(Producer.SCHEMA, allocator)) {
+ writer.start(root);
+ final IntVector ints = (IntVector) root.getVector("a");
+ for (int i = 0; i < 128; i++) {
+ for (int row = 0; row < 1024; row++) {
+ ints.setSafe(row, row);
+ }
+ root.setRowCount(1024);
+ writer.putNext();
+ }
+ }
+
+ final FlightRuntimeException fre = assertThrows(FlightRuntimeException.class, reader::next);
+ assertEquals(FlightStatusCode.CANCELLED, fre.status().code());
+ assertEquals("expected", fre.status().description());
+ }
+ }
+
+ /** Have the client cancel without reading; ensure memory is not leaked. */
+ @Test
+ public void testClientCancel() throws Exception {
+ try (final FlightClient.ExchangeReaderWriter stream =
+ client.doExchange(FlightDescriptor.command(EXCHANGE_DO_GET))) {
+ final FlightStream reader = stream.getReader();
+ reader.cancel("", null);
+ // Cancel should be idempotent
+ reader.cancel("", null);
+ }
+ }
+
+ /** Have the client close the stream without reading; ensure memory is not leaked. */
+ @Test
+ public void testClientClose() throws Exception {
+ try (final FlightClient.ExchangeReaderWriter stream =
+ client.doExchange(FlightDescriptor.command(EXCHANGE_DO_GET))) {
+ assertEquals(Producer.SCHEMA, stream.getReader().getSchema());
+ }
+ // Intentionally leak the allocator in this test. gRPC has a bug where it does not wait for all calls to complete
+ // when shutting down the server, so this test will fail otherwise because it closes the allocator while the
+ // server-side call still has memory allocated.
+ // TODO(ARROW-9586): fix this once we track outstanding RPCs outside of gRPC.
+ // https://stackoverflow.com/questions/46716024/
+ allocator = null;
+ client = null;
+ }
+
+ static class Producer extends NoOpFlightProducer {
+ static final Schema SCHEMA = new Schema(
+ Collections.singletonList(Field.nullable("a", new ArrowType.Int(32, true))));
+ private final BufferAllocator allocator;
+
+ Producer(BufferAllocator allocator) {
+ this.allocator = allocator;
+ }
+
+ @Override
+ public void doExchange(CallContext context, FlightStream reader, ServerStreamListener writer) {
+ if (Arrays.equals(reader.getDescriptor().getCommand(), EXCHANGE_METADATA_ONLY)) {
+ metadataOnly(context, reader, writer);
+ } else if (Arrays.equals(reader.getDescriptor().getCommand(), EXCHANGE_DO_GET)) {
+ doGet(context, reader, writer);
+ } else if (Arrays.equals(reader.getDescriptor().getCommand(), EXCHANGE_DO_PUT)) {
+ doPut(context, reader, writer);
+ } else if (Arrays.equals(reader.getDescriptor().getCommand(), EXCHANGE_ECHO)) {
+ echo(context, reader, writer);
+ } else if (Arrays.equals(reader.getDescriptor().getCommand(), EXCHANGE_TRANSFORM)) {
+ transform(context, reader, writer);
+ } else if (Arrays.equals(reader.getDescriptor().getCommand(), EXCHANGE_CANCEL)) {
+ cancel(context, reader, writer);
+ } else {
+ writer.error(CallStatus.UNIMPLEMENTED.withDescription("Command not implemented").toRuntimeException());
+ }
+ }
+
+ /** Emulate DoGet. */
+ private void doGet(CallContext context, FlightStream reader, ServerStreamListener writer) {
+ try (VectorSchemaRoot root = VectorSchemaRoot.create(SCHEMA, allocator)) {
+ writer.start(root);
+ root.allocateNew();
+ IntVector iv = (IntVector) root.getVector("a");
+
+ for (int i = 0; i < 100; i += 2) {
+ iv.set(0, i);
+ iv.set(1, i + 1);
+ root.setRowCount(2);
+ writer.putNext();
+ }
+ }
+ writer.completed();
+ }
+
+ /** Emulate DoPut. */
+ private void doPut(CallContext context, FlightStream reader, ServerStreamListener writer) {
+ int counter = 0;
+ while (reader.next()) {
+ if (!reader.hasRoot()) {
+ writer.error(CallStatus.INVALID_ARGUMENT.withDescription("Message has no data").toRuntimeException());
+ return;
+ }
+ counter += reader.getRoot().getRowCount();
+
+ final ArrowBuf pong = allocator.buffer(4);
+ pong.writeInt(counter);
+ writer.putMetadata(pong);
+ }
+ writer.completed();
+ }
+
+ /** Exchange metadata without ever exchanging data. */
+ private void metadataOnly(CallContext context, FlightStream reader, ServerStreamListener writer) {
+ final ArrowBuf buf = allocator.buffer(4);
+ buf.writeInt(42);
+ writer.putMetadata(buf);
+ assertTrue(reader.next());
+ assertNotNull(reader.getLatestMetadata());
+ reader.getLatestMetadata().getReferenceManager().retain();
+ writer.putMetadata(reader.getLatestMetadata());
+ writer.completed();
+ }
+
+ /** Echo the client's response back to it. */
+ private void echo(CallContext context, FlightStream reader, ServerStreamListener writer) {
+ VectorSchemaRoot root = null;
+ VectorLoader loader = null;
+ while (reader.next()) {
+ if (reader.hasRoot()) {
+ if (root == null) {
+ root = VectorSchemaRoot.create(reader.getSchema(), allocator);
+ loader = new VectorLoader(root);
+ writer.start(root);
+ }
+ VectorUnloader unloader = new VectorUnloader(reader.getRoot());
+ try (final ArrowRecordBatch arb = unloader.getRecordBatch()) {
+ loader.load(arb);
+ }
+ if (reader.getLatestMetadata() != null) {
+ reader.getLatestMetadata().getReferenceManager().retain();
+ writer.putNext(reader.getLatestMetadata());
+ } else {
+ writer.putNext();
+ }
+ } else {
+ // Pure metadata
+ reader.getLatestMetadata().getReferenceManager().retain();
+ writer.putMetadata(reader.getLatestMetadata());
+ }
+ }
+ if (root != null) {
+ root.close();
+ }
+ writer.completed();
+ }
+
+ /** Accept a set of messages, then return some result. */
+ private void transform(CallContext context, FlightStream reader, ServerStreamListener writer) {
+ final Schema schema = reader.getSchema();
+ for (final Field field : schema.getFields()) {
+ if (!(field.getType() instanceof ArrowType.Int)) {
+ writer.error(CallStatus.INVALID_ARGUMENT.withDescription("Invalid type: " + field).toRuntimeException());
+ return;
+ }
+ final ArrowType.Int intType = (ArrowType.Int) field.getType();
+ if (!intType.getIsSigned() || intType.getBitWidth() != 32) {
+ writer.error(CallStatus.INVALID_ARGUMENT.withDescription("Must be i32: " + field).toRuntimeException());
+ return;
+ }
+ }
+ int batches = 0;
+ try (final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) {
+ writer.start(root);
+ writer.setUseZeroCopy(true);
+ final VectorLoader loader = new VectorLoader(root);
+ final VectorUnloader unloader = new VectorUnloader(reader.getRoot());
+ while (reader.next()) {
+ try (final ArrowRecordBatch batch = unloader.getRecordBatch()) {
+ loader.load(batch);
+ }
+ batches++;
+ for (final FieldVector rawVec : root.getFieldVectors()) {
+ final IntVector vec = (IntVector) rawVec;
+ for (int i = 0; i < root.getRowCount(); i++) {
+ if (!vec.isNull(i)) {
+ vec.set(i, vec.get(i) * 2);
+ }
+ }
+ }
+ writer.putNext();
+ }
+ }
+ final ArrowBuf count = allocator.buffer(4);
+ count.writeInt(batches);
+ writer.putMetadata(count);
+ writer.completed();
+ }
+
+ /** Immediately cancel the call. */
+ private void cancel(CallContext context, FlightStream reader, ServerStreamListener writer) {
+ writer.error(CallStatus.CANCELLED.withDescription("expected").toRuntimeException());
+ }
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestErrorMetadata.java b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestErrorMetadata.java
new file mode 100644
index 000000000..2c62bc7fa
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestErrorMetadata.java
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import org.apache.arrow.flight.perf.impl.PerfOuterClass;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.junit.Assert;
+import org.junit.Test;
+
+import com.google.protobuf.Any;
+import com.google.protobuf.InvalidProtocolBufferException;
+import com.google.rpc.Status;
+
+import io.grpc.Metadata;
+import io.grpc.StatusRuntimeException;
+import io.grpc.protobuf.ProtoUtils;
+import io.grpc.protobuf.StatusProto;
+
+public class TestErrorMetadata {
+ private static final Metadata.BinaryMarshaller<Status> marshaller =
+ ProtoUtils.metadataMarshaller(Status.getDefaultInstance());
+
+ /** Ensure metadata attached to a gRPC error is propagated. */
+ @Test
+ public void testGrpcMetadata() throws Exception {
+ PerfOuterClass.Perf perf = PerfOuterClass.Perf.newBuilder()
+ .setStreamCount(12)
+ .setRecordsPerBatch(1000)
+ .setRecordsPerStream(1000000L)
+ .build();
+ StatusRuntimeExceptionProducer producer = new StatusRuntimeExceptionProducer(perf);
+ try (final BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+ final FlightServer s =
+ FlightTestUtil.getStartedServer(
+ (location) -> {
+ return FlightServer.builder(allocator, location, producer).build();
+ });
+ final FlightClient client = FlightClient.builder(allocator, s.getLocation()).build()) {
+ final CallStatus flightStatus = FlightTestUtil.assertCode(FlightStatusCode.CANCELLED, () -> {
+ FlightStream stream = client.getStream(new Ticket("abs".getBytes()));
+ stream.next();
+ });
+ PerfOuterClass.Perf newPerf = null;
+ ErrorFlightMetadata metadata = flightStatus.metadata();
+ Assert.assertNotNull(metadata);
+ Assert.assertEquals(2, metadata.keys().size());
+ Assert.assertTrue(metadata.containsKey("grpc-status-details-bin"));
+ Status status = marshaller.parseBytes(metadata.getByte("grpc-status-details-bin"));
+ for (Any details : status.getDetailsList()) {
+ if (details.is(PerfOuterClass.Perf.class)) {
+ try {
+ newPerf = details.unpack(PerfOuterClass.Perf.class);
+ } catch (InvalidProtocolBufferException e) {
+ Assert.fail();
+ }
+ }
+ }
+ Assert.assertNotNull(newPerf);
+ Assert.assertEquals(perf, newPerf);
+ }
+ }
+
+ /** Ensure metadata attached to a Flight error is propagated. */
+ @Test
+ public void testFlightMetadata() throws Exception {
+ try (final BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+ final FlightServer s =
+ FlightTestUtil.getStartedServer(
+ (location) -> FlightServer.builder(allocator, location, new CallStatusProducer()).build());
+ final FlightClient client = FlightClient.builder(allocator, s.getLocation()).build()) {
+ CallStatus flightStatus = FlightTestUtil.assertCode(FlightStatusCode.INVALID_ARGUMENT, () -> {
+ FlightStream stream = client.getStream(new Ticket(new byte[0]));
+ stream.next();
+ });
+ ErrorFlightMetadata metadata = flightStatus.metadata();
+ Assert.assertNotNull(metadata);
+ Assert.assertEquals("foo", metadata.get("x-foo"));
+ Assert.assertArrayEquals(new byte[]{1}, metadata.getByte("x-bar-bin"));
+
+ flightStatus = FlightTestUtil.assertCode(FlightStatusCode.INVALID_ARGUMENT, () -> {
+ client.getInfo(FlightDescriptor.command(new byte[0]));
+ });
+ metadata = flightStatus.metadata();
+ Assert.assertNotNull(metadata);
+ Assert.assertEquals("foo", metadata.get("x-foo"));
+ Assert.assertArrayEquals(new byte[]{1}, metadata.getByte("x-bar-bin"));
+ }
+ }
+
+ private static class StatusRuntimeExceptionProducer extends NoOpFlightProducer {
+ private final PerfOuterClass.Perf perf;
+
+ private StatusRuntimeExceptionProducer(PerfOuterClass.Perf perf) {
+ this.perf = perf;
+ }
+
+ @Override
+ public void getStream(CallContext context, Ticket ticket, ServerStreamListener listener) {
+ StatusRuntimeException sre = StatusProto.toStatusRuntimeException(Status.newBuilder()
+ .setCode(1)
+ .setMessage("Testing 1 2 3")
+ .addDetails(Any.pack(perf, "arrow/meta/types"))
+ .build());
+ listener.error(sre);
+ }
+ }
+
+ private static class CallStatusProducer extends NoOpFlightProducer {
+ ErrorFlightMetadata metadata;
+
+ CallStatusProducer() {
+ this.metadata = new ErrorFlightMetadata();
+ metadata.insert("x-foo", "foo");
+ metadata.insert("x-bar-bin", new byte[]{1});
+ }
+
+ @Override
+ public void getStream(CallContext context, Ticket ticket, ServerStreamListener listener) {
+ listener.error(CallStatus.INVALID_ARGUMENT.withDescription("Failed").withMetadata(metadata).toRuntimeException());
+ }
+
+ @Override
+ public FlightInfo getFlightInfo(CallContext context, FlightDescriptor descriptor) {
+ throw CallStatus.INVALID_ARGUMENT.withDescription("Failed").withMetadata(metadata).toRuntimeException();
+ }
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestFlightClient.java b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestFlightClient.java
new file mode 100644
index 000000000..30e351e94
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestFlightClient.java
@@ -0,0 +1,225 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.nio.charset.StandardCharsets;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.arrow.flight.FlightClient.ClientStreamListener;
+import org.apache.arrow.flight.TestBasicOperation.Producer;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorLoader;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.VectorUnloader;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryEncoder;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.Assert;
+import org.junit.Ignore;
+import org.junit.Test;
+import org.junit.jupiter.api.Assertions;
+
+public class TestFlightClient {
+ /**
+ * ARROW-5063: make sure two clients to the same location can be closed independently.
+ */
+ @Test
+ public void independentShutdown() throws Exception {
+ try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+ final FlightServer server = FlightTestUtil.getStartedServer(
+ location -> FlightServer.builder(allocator, location,
+ new Producer(allocator)).build())) {
+ final Location location = Location.forGrpcInsecure(FlightTestUtil.LOCALHOST, server.getPort());
+ final Schema schema = new Schema(Collections.singletonList(Field.nullable("a", new ArrowType.Int(32, true))));
+ try (final FlightClient client1 = FlightClient.builder(allocator, location).build();
+ final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) {
+ // Use startPut as this ensures the RPC won't finish until we want it to
+ final ClientStreamListener listener = client1.startPut(FlightDescriptor.path("test"), root,
+ new AsyncPutListener());
+ try (final FlightClient client2 = FlightClient.builder(allocator, location).build()) {
+ client2.listActions().forEach(actionType -> Assert.assertNotNull(actionType.getType()));
+ }
+ listener.completed();
+ listener.getResult();
+ }
+ }
+ }
+
+ /**
+ * ARROW-5978: make sure that we can properly close a client/stream after requesting dictionaries.
+ */
+ @Ignore // Unfortunately this test is flaky in CI.
+ @Test
+ public void freeDictionaries() throws Exception {
+ final Schema expectedSchema = new Schema(Collections
+ .singletonList(new Field("encoded",
+ new FieldType(true, new ArrowType.Int(32, true), new DictionaryEncoding(1L, false, null)), null)));
+ try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+ final BufferAllocator serverAllocator = allocator.newChildAllocator("flight-server", 0, Integer.MAX_VALUE);
+ final FlightServer server = FlightTestUtil.getStartedServer(
+ location -> FlightServer.builder(serverAllocator, location,
+ new DictionaryProducer(serverAllocator)).build())) {
+ final Location location = Location.forGrpcInsecure(FlightTestUtil.LOCALHOST, server.getPort());
+ try (final FlightClient client = FlightClient.builder(allocator, location).build()) {
+ try (final FlightStream stream = client.getStream(new Ticket(new byte[0]))) {
+ Assert.assertTrue(stream.next());
+ Assert.assertNotNull(stream.getDictionaryProvider().lookup(1));
+ final VectorSchemaRoot root = stream.getRoot();
+ Assert.assertEquals(expectedSchema, root.getSchema());
+ Assert.assertEquals(6, root.getVector("encoded").getValueCount());
+ try (final ValueVector decoded = DictionaryEncoder
+ .decode(root.getVector("encoded"), stream.getDictionaryProvider().lookup(1))) {
+ Assert.assertFalse(decoded.isNull(1));
+ Assert.assertTrue(decoded instanceof VarCharVector);
+ Assert.assertArrayEquals("one".getBytes(StandardCharsets.UTF_8), ((VarCharVector) decoded).get(1));
+ }
+ Assert.assertFalse(stream.next());
+ }
+ // Closing stream fails if it doesn't free dictionaries; closing dictionaries fails (refcount goes negative)
+ // if reference isn't retained in ArrowMessage
+ }
+ }
+ }
+
+ /**
+ * ARROW-5978: make sure that dictionary ownership can't be claimed twice.
+ */
+ @Ignore // Unfortunately this test is flaky in CI.
+ @Test
+ public void ownDictionaries() throws Exception {
+ try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+ final BufferAllocator serverAllocator = allocator.newChildAllocator("flight-server", 0, Integer.MAX_VALUE);
+ final FlightServer server = FlightTestUtil.getStartedServer(
+ location -> FlightServer.builder(serverAllocator, location,
+ new DictionaryProducer(serverAllocator)).build())) {
+ final Location location = Location.forGrpcInsecure(FlightTestUtil.LOCALHOST, server.getPort());
+ try (final FlightClient client = FlightClient.builder(allocator, location).build()) {
+ try (final FlightStream stream = client.getStream(new Ticket(new byte[0]))) {
+ Assert.assertTrue(stream.next());
+ Assert.assertFalse(stream.next());
+ final DictionaryProvider provider = stream.takeDictionaryOwnership();
+ Assertions.assertThrows(IllegalStateException.class, stream::takeDictionaryOwnership);
+ Assertions.assertThrows(IllegalStateException.class, stream::getDictionaryProvider);
+ DictionaryUtils.closeDictionaries(stream.getSchema(), provider);
+ }
+ }
+ }
+ }
+
+ /**
+ * ARROW-5978: make sure that dictionaries can be used after closing the stream.
+ */
+ @Ignore // Unfortunately this test is flaky in CI.
+ @Test
+ public void useDictionariesAfterClose() throws Exception {
+ try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+ final BufferAllocator serverAllocator = allocator.newChildAllocator("flight-server", 0, Integer.MAX_VALUE);
+ final FlightServer server = FlightTestUtil.getStartedServer(
+ location -> FlightServer.builder(serverAllocator, location, new DictionaryProducer(serverAllocator))
+ .build())) {
+ final Location location = Location.forGrpcInsecure(FlightTestUtil.LOCALHOST, server.getPort());
+ try (final FlightClient client = FlightClient.builder(allocator, location).build()) {
+ final VectorSchemaRoot root;
+ final DictionaryProvider provider;
+ try (final FlightStream stream = client.getStream(new Ticket(new byte[0]))) {
+ final VectorUnloader unloader = new VectorUnloader(stream.getRoot());
+ root = VectorSchemaRoot.create(stream.getSchema(), allocator);
+ final VectorLoader loader = new VectorLoader(root);
+ while (stream.next()) {
+ try (final ArrowRecordBatch arb = unloader.getRecordBatch()) {
+ loader.load(arb);
+ }
+ }
+ provider = stream.takeDictionaryOwnership();
+ }
+ try (final ValueVector decoded = DictionaryEncoder
+ .decode(root.getVector("encoded"), provider.lookup(1))) {
+ Assert.assertFalse(decoded.isNull(1));
+ Assert.assertTrue(decoded instanceof VarCharVector);
+ Assert.assertArrayEquals("one".getBytes(StandardCharsets.UTF_8), ((VarCharVector) decoded).get(1));
+ }
+ root.close();
+ DictionaryUtils.closeDictionaries(root.getSchema(), provider);
+ }
+ }
+ }
+
+ static class DictionaryProducer extends NoOpFlightProducer {
+
+ private final BufferAllocator allocator;
+
+ public DictionaryProducer(BufferAllocator allocator) {
+ this.allocator = allocator;
+ }
+
+ @Override
+ public void getStream(CallContext context, Ticket ticket, ServerStreamListener listener) {
+ final byte[] zero = "zero".getBytes(StandardCharsets.UTF_8);
+ final byte[] one = "one".getBytes(StandardCharsets.UTF_8);
+ final byte[] two = "two".getBytes(StandardCharsets.UTF_8);
+ try (final VarCharVector dictionaryVector = newVarCharVector("dictionary", allocator)) {
+ final DictionaryProvider.MapDictionaryProvider provider = new DictionaryProvider.MapDictionaryProvider();
+
+ dictionaryVector.allocateNew(512, 3);
+ dictionaryVector.setSafe(0, zero, 0, zero.length);
+ dictionaryVector.setSafe(1, one, 0, one.length);
+ dictionaryVector.setSafe(2, two, 0, two.length);
+ dictionaryVector.setValueCount(3);
+
+ final Dictionary dictionary = new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null));
+ provider.put(dictionary);
+
+ final FieldVector encodedVector;
+ try (final VarCharVector unencoded = newVarCharVector("encoded", allocator)) {
+ unencoded.allocateNewSafe();
+ unencoded.set(1, one);
+ unencoded.set(2, two);
+ unencoded.set(3, zero);
+ unencoded.set(4, two);
+ unencoded.setValueCount(6);
+ encodedVector = (FieldVector) DictionaryEncoder.encode(unencoded, dictionary);
+ }
+
+ final List<Field> fields = Collections.singletonList(encodedVector.getField());
+ final List<FieldVector> vectors = Collections.singletonList(encodedVector);
+
+ try (final VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors, encodedVector.getValueCount())) {
+ listener.start(root, provider);
+ listener.putNext();
+ listener.completed();
+ }
+ }
+ }
+
+ private static VarCharVector newVarCharVector(String name, BufferAllocator allocator) {
+ return (VarCharVector)
+ FieldType.nullable(new ArrowType.Utf8()).createNewSingleVector(name, allocator, null);
+ }
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestFlightService.java b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestFlightService.java
new file mode 100644
index 000000000..65ef12a8a
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestFlightService.java
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import static org.junit.jupiter.api.Assertions.fail;
+
+import org.apache.arrow.flight.impl.Flight;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.util.AutoCloseables;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import io.grpc.stub.ServerCallStreamObserver;
+
+public class TestFlightService {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void setup() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ }
+
+ @After
+ public void cleanup() throws Exception {
+ AutoCloseables.close(allocator);
+ }
+
+ @Test
+ public void testFlightServiceWithNoAuthHandlerOrInterceptors() {
+ // This test is for ARROW-10491. There was a bug where FlightService would try to access the RequestContext,
+ // but the RequestContext was getting set to null because no interceptors were active to initialize it
+ // when using FlightService directly rather than starting up a FlightServer.
+
+ // Arrange
+ final FlightProducer producer = new NoOpFlightProducer() {
+ @Override
+ public void getStream(CallContext context, Ticket ticket,
+ ServerStreamListener listener) {
+ listener.completed();
+ }
+ };
+
+ // This response observer notifies that the test failed if onError() is called.
+ final ServerCallStreamObserver<ArrowMessage> observer = new ServerCallStreamObserver<ArrowMessage>() {
+ @Override
+ public boolean isCancelled() {
+ return false;
+ }
+
+ @Override
+ public void setOnCancelHandler(Runnable runnable) {
+
+ }
+
+ @Override
+ public void setCompression(String s) {
+
+ }
+
+ @Override
+ public boolean isReady() {
+ return false;
+ }
+
+ @Override
+ public void setOnReadyHandler(Runnable runnable) {
+
+ }
+
+ @Override
+ public void disableAutoInboundFlowControl() {
+
+ }
+
+ @Override
+ public void request(int i) {
+
+ }
+
+ @Override
+ public void setMessageCompression(boolean b) {
+
+ }
+
+ @Override
+ public void onNext(ArrowMessage arrowMessage) {
+
+ }
+
+ @Override
+ public void onError(Throwable throwable) {
+ fail(throwable);
+ }
+
+ @Override
+ public void onCompleted() {
+
+ }
+ };
+ final FlightService flightService = new FlightService(allocator, producer, null, null);
+
+ // Act
+ flightService.doGetCustom(Flight.Ticket.newBuilder().build(), observer);
+
+ // fail() would have been called if an error happened during doGetCustom(), so this test passed.
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestLargeMessage.java b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestLargeMessage.java
new file mode 100644
index 000000000..629b6f5eb
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestLargeMessage.java
@@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Stream;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestLargeMessage {
+ /**
+ * Make sure a Flight client accepts large message payloads by default.
+ */
+ @Test
+ public void getLargeMessage() throws Exception {
+ try (final BufferAllocator a = new RootAllocator(Long.MAX_VALUE);
+ final Producer producer = new Producer(a);
+ final FlightServer s =
+ FlightTestUtil.getStartedServer((location) -> FlightServer.builder(a, location, producer).build())) {
+
+ try (FlightClient client = FlightClient.builder(a, s.getLocation()).build()) {
+ try (FlightStream stream = client.getStream(new Ticket(new byte[]{}));
+ VectorSchemaRoot root = stream.getRoot()) {
+ while (stream.next()) {
+ for (final Field field : root.getSchema().getFields()) {
+ int value = 0;
+ final IntVector iv = (IntVector) root.getVector(field.getName());
+ for (int i = 0; i < root.getRowCount(); i++) {
+ Assert.assertEquals(value, iv.get(i));
+ value++;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * Make sure a Flight server accepts large message payloads by default.
+ */
+ @Test
+ public void putLargeMessage() throws Exception {
+ try (final BufferAllocator a = new RootAllocator(Long.MAX_VALUE);
+ final Producer producer = new Producer(a);
+ final FlightServer s =
+ FlightTestUtil.getStartedServer((location) -> FlightServer.builder(a, location, producer).build()
+ )) {
+
+ try (FlightClient client = FlightClient.builder(a, s.getLocation()).build();
+ BufferAllocator testAllocator = a.newChildAllocator("testcase", 0, Long.MAX_VALUE);
+ VectorSchemaRoot root = generateData(testAllocator)) {
+ final FlightClient.ClientStreamListener listener = client.startPut(FlightDescriptor.path("hello"), root,
+ new AsyncPutListener());
+ listener.putNext();
+ listener.completed();
+ listener.getResult();
+ }
+ }
+ }
+
+ private static VectorSchemaRoot generateData(BufferAllocator allocator) {
+ final int size = 128 * 1024;
+ final List<String> fieldNames = Arrays.asList("c1", "c2", "c3", "c4", "c5", "c6", "c7", "c8", "c9", "c10");
+ final Stream<Field> fields = fieldNames
+ .stream()
+ .map(fieldName -> new Field(fieldName, FieldType.nullable(new ArrowType.Int(32, true)), null));
+ final Schema schema = new Schema(fields::iterator, null);
+
+ final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator);
+ root.allocateNew();
+ for (final String fieldName : fieldNames) {
+ final IntVector iv = (IntVector) root.getVector(fieldName);
+ iv.setValueCount(size);
+ for (int i = 0; i < size; i++) {
+ iv.set(i, i);
+ }
+ }
+ root.setRowCount(size);
+ return root;
+ }
+
+ private static class Producer implements FlightProducer, AutoCloseable {
+ private final BufferAllocator allocator;
+
+ Producer(BufferAllocator allocator) {
+ this.allocator = allocator;
+ }
+
+ @Override
+ public void getStream(CallContext context, Ticket ticket,
+ ServerStreamListener listener) {
+ try (VectorSchemaRoot root = generateData(allocator)) {
+ listener.start(root);
+ listener.putNext();
+ listener.completed();
+ }
+ }
+
+ @Override
+ public void listFlights(CallContext context, Criteria criteria,
+ StreamListener<FlightInfo> listener) {
+
+ }
+
+ @Override
+ public FlightInfo getFlightInfo(CallContext context,
+ FlightDescriptor descriptor) {
+ return null;
+ }
+
+ @Override
+ public Runnable acceptPut(CallContext context, FlightStream flightStream, StreamListener<PutResult> ackStream) {
+ return () -> {
+ try (VectorSchemaRoot root = flightStream.getRoot()) {
+ while (flightStream.next()) {
+ ;
+ }
+ }
+ };
+ }
+
+ @Override
+ public void doAction(CallContext context, Action action,
+ StreamListener<Result> listener) {
+ listener.onCompleted();
+ }
+
+ @Override
+ public void listActions(CallContext context,
+ StreamListener<ActionType> listener) {
+
+ }
+
+ @Override
+ public void close() throws Exception {
+ allocator.close();
+ }
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestLeak.java b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestLeak.java
new file mode 100644
index 000000000..6e2870499
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestLeak.java
@@ -0,0 +1,182 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.util.Arrays;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.types.FloatingPointPrecision;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.Test;
+
+/**
+ * Tests for scenarios where Flight could leak memory.
+ */
+public class TestLeak {
+
+ private static final int ROWS = 2048;
+
+ private static Schema getSchema() {
+ return new Schema(Arrays.asList(
+ Field.nullable("0", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)),
+ Field.nullable("1", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)),
+ Field.nullable("2", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)),
+ Field.nullable("3", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)),
+ Field.nullable("4", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)),
+ Field.nullable("5", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)),
+ Field.nullable("6", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)),
+ Field.nullable("7", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)),
+ Field.nullable("8", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)),
+ Field.nullable("9", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)),
+ Field.nullable("10", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE))
+ ));
+ }
+
+ /**
+ * Ensure that if the client cancels, the server does not leak memory.
+ *
+ * <p>In gRPC, canceling the stream from the client sends an event to the server. Once processed, gRPC will start
+ * silently rejecting messages sent by the server. However, Flight depends on gRPC processing these messages in order
+ * to free the associated memory.
+ */
+ @Test
+ public void testCancelingDoGetDoesNotLeak() throws Exception {
+ final CountDownLatch callFinished = new CountDownLatch(1);
+ try (final BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+ final FlightServer s =
+ FlightTestUtil.getStartedServer(
+ (location) -> FlightServer.builder(allocator, location, new LeakFlightProducer(allocator, callFinished))
+ .build());
+ final FlightClient client = FlightClient.builder(allocator, s.getLocation()).build()) {
+
+ final FlightStream stream = client.getStream(new Ticket(new byte[0]));
+ stream.getRoot();
+ stream.cancel("Cancel", null);
+
+ // Wait for the call to finish. (Closing the allocator while a call is ongoing is a guaranteed leak.)
+ callFinished.await(60, TimeUnit.SECONDS);
+
+ s.shutdown();
+ s.awaitTermination();
+ }
+ }
+
+ @Test
+ public void testCancelingDoPutDoesNotBlock() throws Exception {
+ final CountDownLatch callFinished = new CountDownLatch(1);
+ try (final BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+ final FlightServer s =
+ FlightTestUtil.getStartedServer(
+ (location) -> FlightServer.builder(allocator, location, new LeakFlightProducer(allocator, callFinished))
+ .build());
+ final FlightClient client = FlightClient.builder(allocator, s.getLocation()).build()) {
+
+ try (final VectorSchemaRoot root = VectorSchemaRoot.create(getSchema(), allocator)) {
+ final FlightDescriptor descriptor = FlightDescriptor.command(new byte[0]);
+ final SyncPutListener listener = new SyncPutListener();
+ final FlightClient.ClientStreamListener stream = client.startPut(descriptor, root, listener);
+ // Wait for the server to cancel
+ callFinished.await(60, TimeUnit.SECONDS);
+
+ for (int col = 0; col < 11; col++) {
+ final Float8Vector vector = (Float8Vector) root.getVector(Integer.toString(col));
+ vector.allocateNew();
+ for (int row = 0; row < ROWS; row++) {
+ vector.setSafe(row, 10.);
+ }
+ }
+ root.setRowCount(ROWS);
+ // Unlike DoGet, this method fairly reliably will write the message to the stream, so even without the fix
+ // for ARROW-7343, this won't leak memory.
+ // However, it will block if FlightClient doesn't check for cancellation.
+ stream.putNext();
+ stream.completed();
+ }
+
+ s.shutdown();
+ s.awaitTermination();
+ }
+ }
+
+ /**
+ * A FlightProducer that always produces a fixed data stream with metadata on the side.
+ */
+ private static class LeakFlightProducer extends NoOpFlightProducer {
+
+ private final BufferAllocator allocator;
+ private final CountDownLatch callFinished;
+
+ public LeakFlightProducer(BufferAllocator allocator, CountDownLatch callFinished) {
+ this.allocator = allocator;
+ this.callFinished = callFinished;
+ }
+
+ @Override
+ public void getStream(CallContext context, Ticket ticket, ServerStreamListener listener) {
+ BufferAllocator childAllocator = allocator.newChildAllocator("foo", 0, Long.MAX_VALUE);
+ VectorSchemaRoot root = VectorSchemaRoot.create(TestLeak.getSchema(), childAllocator);
+ root.allocateNew();
+ listener.start(root);
+
+ // We can't poll listener#isCancelled since gRPC has two distinct "is cancelled" flags.
+ // TODO: should we continue leaking gRPC semantics? Can we even avoid this?
+ listener.setOnCancelHandler(() -> {
+ try {
+ for (int col = 0; col < 11; col++) {
+ final Float8Vector vector = (Float8Vector) root.getVector(Integer.toString(col));
+ vector.allocateNew();
+ for (int row = 0; row < ROWS; row++) {
+ vector.setSafe(row, 10.);
+ }
+ }
+ root.setRowCount(ROWS);
+ // Once the call is "really cancelled" (setOnCancelListener has run/is running), this call is actually a
+ // no-op on the gRPC side and will leak the ArrowMessage unless Flight checks for this.
+ listener.putNext();
+ listener.completed();
+ } finally {
+ try {
+ root.close();
+ childAllocator.close();
+ } finally {
+ // Don't let the test hang if we throw above
+ callFinished.countDown();
+ }
+ }
+ });
+ }
+
+ @Override
+ public Runnable acceptPut(CallContext context,
+ FlightStream flightStream, StreamListener<PutResult> ackStream) {
+ return () -> {
+ flightStream.getRoot();
+ ackStream.onError(CallStatus.CANCELLED.withDescription("CANCELLED").toRuntimeException());
+ callFinished.countDown();
+ ackStream.onCompleted();
+ };
+ }
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestMetadataVersion.java b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestMetadataVersion.java
new file mode 100644
index 000000000..83a694bf3
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestMetadataVersion.java
@@ -0,0 +1,319 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.Collections;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.ipc.message.IpcOption;
+import org.apache.arrow.vector.types.MetadataVersion;
+import org.apache.arrow.vector.types.UnionMode;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ * Test clients/servers with different metadata versions.
+ */
+public class TestMetadataVersion {
+ private static BufferAllocator allocator;
+ private static Schema schema;
+ private static IpcOption optionV4;
+ private static IpcOption optionV5;
+ private static Schema unionSchema;
+
+ @BeforeClass
+ public static void setUpClass() {
+ allocator = new RootAllocator(Integer.MAX_VALUE);
+ schema = new Schema(Collections.singletonList(Field.nullable("foo", new ArrowType.Int(32, true))));
+ unionSchema = new Schema(
+ Collections.singletonList(Field.nullable("union", new ArrowType.Union(UnionMode.Dense, new int[]{0}))));
+
+ // avoid writing legacy ipc format by default
+ optionV4 = new IpcOption(false, MetadataVersion.V4);
+ optionV5 = IpcOption.DEFAULT;
+ }
+
+ @AfterClass
+ public static void tearDownClass() {
+ allocator.close();
+ }
+
+ @Test
+ public void testGetFlightInfoV4() throws Exception {
+ try (final FlightServer server = startServer(optionV4);
+ final FlightClient client = connect(server)) {
+ final FlightInfo result = client.getInfo(FlightDescriptor.command(new byte[0]));
+ assertEquals(schema, result.getSchema());
+ }
+ }
+
+ @Test
+ public void testGetSchemaV4() throws Exception {
+ try (final FlightServer server = startServer(optionV4);
+ final FlightClient client = connect(server)) {
+ final SchemaResult result = client.getSchema(FlightDescriptor.command(new byte[0]));
+ assertEquals(schema, result.getSchema());
+ }
+ }
+
+ @Test
+ public void testUnionCheck() throws Exception {
+ assertThrows(IllegalArgumentException.class, () -> new SchemaResult(unionSchema, optionV4));
+ assertThrows(IllegalArgumentException.class, () ->
+ new FlightInfo(unionSchema, FlightDescriptor.command(new byte[0]), Collections.emptyList(), -1, -1, optionV4));
+ try (final FlightServer server = startServer(optionV4);
+ final FlightClient client = connect(server);
+ final FlightStream stream = client.getStream(new Ticket("union".getBytes(StandardCharsets.UTF_8)))) {
+ final FlightRuntimeException err = assertThrows(FlightRuntimeException.class, stream::next);
+ assertTrue(err.getMessage(), err.getMessage().contains("Cannot write union with V4 metadata"));
+ }
+
+ try (final FlightServer server = startServer(optionV4);
+ final FlightClient client = connect(server);
+ final VectorSchemaRoot root = VectorSchemaRoot.create(unionSchema, allocator)) {
+ final FlightDescriptor descriptor = FlightDescriptor.command(new byte[0]);
+ final SyncPutListener reader = new SyncPutListener();
+ final FlightClient.ClientStreamListener listener = client.startPut(descriptor, reader);
+ final IllegalArgumentException err = assertThrows(IllegalArgumentException.class,
+ () -> listener.start(root, null, optionV4));
+ assertTrue(err.getMessage(), err.getMessage().contains("Cannot write union with V4 metadata"));
+ }
+ }
+
+ @Test
+ public void testPutV4() throws Exception {
+ try (final FlightServer server = startServer(optionV4);
+ final FlightClient client = connect(server);
+ final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) {
+ generateData(root);
+ final FlightDescriptor descriptor = FlightDescriptor.command(new byte[0]);
+ final SyncPutListener reader = new SyncPutListener();
+ final FlightClient.ClientStreamListener listener = client.startPut(descriptor, reader);
+ listener.start(root, null, optionV4);
+ listener.putNext();
+ listener.completed();
+ listener.getResult();
+ }
+ }
+
+ @Test
+ public void testGetV4() throws Exception {
+ try (final FlightServer server = startServer(optionV4);
+ final FlightClient client = connect(server);
+ final FlightStream stream = client.getStream(new Ticket(new byte[0]))) {
+ assertTrue(stream.next());
+ assertEquals(optionV4.metadataVersion, stream.metadataVersion);
+ validateRoot(stream.getRoot());
+ assertFalse(stream.next());
+ }
+ }
+
+ @Test
+ public void testExchangeV4ToV5() throws Exception {
+ try (final FlightServer server = startServer(optionV5);
+ final FlightClient client = connect(server);
+ final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator);
+ final FlightClient.ExchangeReaderWriter stream = client.doExchange(FlightDescriptor.command(new byte[0]))) {
+ stream.getWriter().start(root, null, optionV4);
+ generateData(root);
+ stream.getWriter().putNext();
+ stream.getWriter().completed();
+ assertTrue(stream.getReader().next());
+ assertEquals(optionV5.metadataVersion, stream.getReader().metadataVersion);
+ validateRoot(stream.getReader().getRoot());
+ assertFalse(stream.getReader().next());
+ }
+ }
+
+ @Test
+ public void testExchangeV5ToV4() throws Exception {
+ try (final FlightServer server = startServer(optionV4);
+ final FlightClient client = connect(server);
+ final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator);
+ final FlightClient.ExchangeReaderWriter stream = client.doExchange(FlightDescriptor.command(new byte[0]))) {
+ stream.getWriter().start(root, null, optionV5);
+ generateData(root);
+ stream.getWriter().putNext();
+ stream.getWriter().completed();
+ assertTrue(stream.getReader().next());
+ assertEquals(optionV4.metadataVersion, stream.getReader().metadataVersion);
+ validateRoot(stream.getReader().getRoot());
+ assertFalse(stream.getReader().next());
+ }
+ }
+
+ @Test
+ public void testExchangeV4ToV4() throws Exception {
+ try (final FlightServer server = startServer(optionV4);
+ final FlightClient client = connect(server);
+ final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator);
+ final FlightClient.ExchangeReaderWriter stream = client.doExchange(FlightDescriptor.command(new byte[0]))) {
+ stream.getWriter().start(root, null, optionV4);
+ generateData(root);
+ stream.getWriter().putNext();
+ stream.getWriter().completed();
+ assertTrue(stream.getReader().next());
+ assertEquals(optionV4.metadataVersion, stream.getReader().metadataVersion);
+ validateRoot(stream.getReader().getRoot());
+ assertFalse(stream.getReader().next());
+ }
+ }
+
+ private static void generateData(VectorSchemaRoot root) {
+ assertEquals(schema, root.getSchema());
+ final IntVector vector = (IntVector) root.getVector("foo");
+ vector.setSafe(0, 0);
+ vector.setSafe(1, 1);
+ vector.setSafe(2, 4);
+ root.setRowCount(3);
+ }
+
+ private static void validateRoot(VectorSchemaRoot root) {
+ assertEquals(schema, root.getSchema());
+ assertEquals(3, root.getRowCount());
+ final IntVector vector = (IntVector) root.getVector("foo");
+ assertEquals(0, vector.get(0));
+ assertEquals(1, vector.get(1));
+ assertEquals(4, vector.get(2));
+ }
+
+ FlightServer startServer(IpcOption option) throws Exception {
+ Location location = Location.forGrpcInsecure(FlightTestUtil.LOCALHOST, 0);
+ VersionFlightProducer producer = new VersionFlightProducer(allocator, option);
+ final FlightServer server = FlightServer.builder(allocator, location, producer).build();
+ server.start();
+ return server;
+ }
+
+ FlightClient connect(FlightServer server) {
+ Location location = Location.forGrpcInsecure(FlightTestUtil.LOCALHOST, server.getPort());
+ return FlightClient.builder(allocator, location).build();
+ }
+
+ static final class VersionFlightProducer extends NoOpFlightProducer {
+ private final BufferAllocator allocator;
+ private final IpcOption option;
+
+ VersionFlightProducer(BufferAllocator allocator, IpcOption option) {
+ this.allocator = allocator;
+ this.option = option;
+ }
+
+ @Override
+ public FlightInfo getFlightInfo(CallContext context, FlightDescriptor descriptor) {
+ return new FlightInfo(schema, descriptor, Collections.emptyList(), -1, -1, option);
+ }
+
+ @Override
+ public SchemaResult getSchema(CallContext context, FlightDescriptor descriptor) {
+ return new SchemaResult(schema, option);
+ }
+
+ @Override
+ public void getStream(CallContext context, Ticket ticket, ServerStreamListener listener) {
+ if (Arrays.equals("union".getBytes(StandardCharsets.UTF_8), ticket.getBytes())) {
+ try (final VectorSchemaRoot root = VectorSchemaRoot.create(unionSchema, allocator)) {
+ listener.start(root, null, option);
+ } catch (IllegalArgumentException e) {
+ listener.error(CallStatus.INTERNAL.withCause(e).withDescription(e.getMessage()).toRuntimeException());
+ return;
+ }
+ listener.error(CallStatus.INTERNAL.withDescription("Expected exception not raised").toRuntimeException());
+ return;
+ }
+ try (final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) {
+ listener.start(root, null, option);
+ generateData(root);
+ listener.putNext();
+ listener.completed();
+ }
+ }
+
+ @Override
+ public Runnable acceptPut(CallContext context, FlightStream flightStream, StreamListener<PutResult> ackStream) {
+ return () -> {
+ try {
+ assertTrue(flightStream.next());
+ assertEquals(option.metadataVersion, flightStream.metadataVersion);
+ validateRoot(flightStream.getRoot());
+ } catch (AssertionError err) {
+ // gRPC doesn't propagate stack traces across the wire.
+ err.printStackTrace();
+ ackStream.onError(CallStatus.INVALID_ARGUMENT
+ .withCause(err)
+ .withDescription("Server assertion failed: " + err)
+ .toRuntimeException());
+ return;
+ } catch (RuntimeException err) {
+ err.printStackTrace();
+ ackStream.onError(CallStatus.INTERNAL
+ .withCause(err)
+ .withDescription("Server assertion failed: " + err)
+ .toRuntimeException());
+ return;
+ }
+ ackStream.onCompleted();
+ };
+ }
+
+ @Override
+ public void doExchange(CallContext context, FlightStream reader, ServerStreamListener writer) {
+ try (final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) {
+ try {
+ assertTrue(reader.next());
+ validateRoot(reader.getRoot());
+ assertFalse(reader.next());
+ } catch (AssertionError err) {
+ // gRPC doesn't propagate stack traces across the wire.
+ err.printStackTrace();
+ writer.error(CallStatus.INVALID_ARGUMENT
+ .withCause(err)
+ .withDescription("Server assertion failed: " + err)
+ .toRuntimeException());
+ return;
+ } catch (RuntimeException err) {
+ err.printStackTrace();
+ writer.error(CallStatus.INTERNAL
+ .withCause(err)
+ .withDescription("Server assertion failed: " + err)
+ .toRuntimeException());
+ return;
+ }
+
+ writer.start(root, null, option);
+ generateData(root);
+ writer.putNext();
+ writer.completed();
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestServerMiddleware.java b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestServerMiddleware.java
new file mode 100644
index 000000000..1f3e35ca3
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestServerMiddleware.java
@@ -0,0 +1,360 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.CompletableFuture;
+import java.util.function.BiConsumer;
+
+import org.apache.arrow.flight.FlightClient.ClientStreamListener;
+import org.apache.arrow.flight.FlightServerMiddleware.Factory;
+import org.apache.arrow.flight.FlightServerMiddleware.Key;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class TestServerMiddleware {
+
+ private static final RuntimeException EXPECTED_EXCEPTION = new RuntimeException("test");
+
+ /**
+ * Make sure errors in DoPut are intercepted.
+ */
+ @Test
+ public void doPutErrors() {
+ test(
+ new ErrorProducer(EXPECTED_EXCEPTION),
+ (allocator, client) -> {
+ final FlightDescriptor descriptor = FlightDescriptor.path("test");
+ try (final VectorSchemaRoot root = VectorSchemaRoot.create(new Schema(Collections.emptyList()), allocator)) {
+ final ClientStreamListener listener = client.startPut(descriptor, root, new SyncPutListener());
+ listener.completed();
+ FlightTestUtil.assertCode(FlightStatusCode.INTERNAL, listener::getResult);
+ }
+ }, (recorder) -> {
+ final CallStatus status = recorder.statusFuture.get();
+ Assert.assertNotNull(status);
+ Assert.assertNotNull(status.cause());
+ Assert.assertEquals(FlightStatusCode.INTERNAL, status.code());
+ });
+ // Check the status after server shutdown (to make sure gRPC finishes pending calls on the server side)
+ }
+
+ /**
+ * Make sure custom error codes in DoPut are intercepted.
+ */
+ @Test
+ public void doPutCustomCode() {
+ test(
+ new ErrorProducer(CallStatus.UNAVAILABLE.withDescription("description").toRuntimeException()),
+ (allocator, client) -> {
+ final FlightDescriptor descriptor = FlightDescriptor.path("test");
+ try (final VectorSchemaRoot root = VectorSchemaRoot.create(new Schema(Collections.emptyList()), allocator)) {
+ final ClientStreamListener listener = client.startPut(descriptor, root, new SyncPutListener());
+ listener.completed();
+ FlightTestUtil.assertCode(FlightStatusCode.UNAVAILABLE, listener::getResult);
+ }
+ }, (recorder) -> {
+ final CallStatus status = recorder.statusFuture.get();
+ Assert.assertNotNull(status);
+ Assert.assertNull(status.cause());
+ Assert.assertEquals(FlightStatusCode.UNAVAILABLE, status.code());
+ Assert.assertEquals("description", status.description());
+ });
+ }
+
+ /**
+ * Make sure uncaught exceptions in DoPut are intercepted.
+ */
+ @Test
+ public void doPutUncaught() {
+ test(new ServerErrorProducer(EXPECTED_EXCEPTION),
+ (allocator, client) -> {
+ final FlightDescriptor descriptor = FlightDescriptor.path("test");
+ try (final VectorSchemaRoot root = VectorSchemaRoot.create(new Schema(Collections.emptyList()), allocator)) {
+ final ClientStreamListener listener = client.startPut(descriptor, root, new SyncPutListener());
+ listener.completed();
+ listener.getResult();
+ }
+ }, (recorder) -> {
+ final CallStatus status = recorder.statusFuture.get();
+ final Throwable err = recorder.errFuture.get();
+ Assert.assertNotNull(status);
+ Assert.assertEquals(FlightStatusCode.OK, status.code());
+ Assert.assertNull(status.cause());
+ Assert.assertNotNull(err);
+ Assert.assertEquals(EXPECTED_EXCEPTION.getMessage(), err.getMessage());
+ });
+ }
+
+ @Test
+ public void listFlightsUncaught() {
+ test(new ServerErrorProducer(EXPECTED_EXCEPTION),
+ (allocator, client) -> client.listFlights(new Criteria(new byte[0])).forEach((action) -> {
+ }), (recorder) -> {
+ final CallStatus status = recorder.statusFuture.get();
+ final Throwable err = recorder.errFuture.get();
+ Assert.assertNotNull(status);
+ Assert.assertEquals(FlightStatusCode.OK, status.code());
+ Assert.assertNull(status.cause());
+ Assert.assertNotNull(err);
+ Assert.assertEquals(EXPECTED_EXCEPTION.getMessage(), err.getMessage());
+ });
+ }
+
+ @Test
+ public void doActionUncaught() {
+ test(new ServerErrorProducer(EXPECTED_EXCEPTION),
+ (allocator, client) -> client.doAction(new Action("test")).forEachRemaining(result -> {
+ }), (recorder) -> {
+ final CallStatus status = recorder.statusFuture.get();
+ final Throwable err = recorder.errFuture.get();
+ Assert.assertNotNull(status);
+ Assert.assertEquals(FlightStatusCode.OK, status.code());
+ Assert.assertNull(status.cause());
+ Assert.assertNotNull(err);
+ Assert.assertEquals(EXPECTED_EXCEPTION.getMessage(), err.getMessage());
+ });
+ }
+
+ @Test
+ public void listActionsUncaught() {
+ test(new ServerErrorProducer(EXPECTED_EXCEPTION),
+ (allocator, client) -> client.listActions().forEach(result -> {
+ }), (recorder) -> {
+ final CallStatus status = recorder.statusFuture.get();
+ final Throwable err = recorder.errFuture.get();
+ Assert.assertNotNull(status);
+ Assert.assertEquals(FlightStatusCode.OK, status.code());
+ Assert.assertNull(status.cause());
+ Assert.assertNotNull(err);
+ Assert.assertEquals(EXPECTED_EXCEPTION.getMessage(), err.getMessage());
+ });
+ }
+
+ @Test
+ public void getFlightInfoUncaught() {
+ test(new ServerErrorProducer(EXPECTED_EXCEPTION),
+ (allocator, client) -> {
+ FlightTestUtil.assertCode(FlightStatusCode.INTERNAL, () -> client.getInfo(FlightDescriptor.path("test")));
+ }, (recorder) -> {
+ final CallStatus status = recorder.statusFuture.get();
+ Assert.assertNotNull(status);
+ Assert.assertEquals(FlightStatusCode.INTERNAL, status.code());
+ Assert.assertNotNull(status.cause());
+ Assert.assertEquals(EXPECTED_EXCEPTION.getMessage(), status.cause().getMessage());
+ });
+ }
+
+ @Test
+ public void doGetUncaught() {
+ test(new ServerErrorProducer(EXPECTED_EXCEPTION),
+ (allocator, client) -> {
+ try (final FlightStream stream = client.getStream(new Ticket(new byte[0]))) {
+ while (stream.next()) {
+ }
+ } catch (Exception e) {
+ Assert.fail(e.toString());
+ }
+ }, (recorder) -> {
+ final CallStatus status = recorder.statusFuture.get();
+ final Throwable err = recorder.errFuture.get();
+ Assert.assertNotNull(status);
+ Assert.assertEquals(FlightStatusCode.OK, status.code());
+ Assert.assertNull(status.cause());
+ Assert.assertNotNull(err);
+ Assert.assertEquals(EXPECTED_EXCEPTION.getMessage(), err.getMessage());
+ });
+ }
+
+ /**
+ * A middleware that records the last error on any call.
+ */
+ static class ErrorRecorder implements FlightServerMiddleware {
+
+ CompletableFuture<CallStatus> statusFuture = new CompletableFuture<>();
+ CompletableFuture<Throwable> errFuture = new CompletableFuture<>();
+
+ @Override
+ public void onBeforeSendingHeaders(CallHeaders outgoingHeaders) {
+ }
+
+ @Override
+ public void onCallCompleted(CallStatus status) {
+ statusFuture.complete(status);
+ }
+
+ @Override
+ public void onCallErrored(Throwable err) {
+ errFuture.complete(err);
+ }
+
+ static class Factory implements FlightServerMiddleware.Factory<ErrorRecorder> {
+
+ ErrorRecorder instance = new ErrorRecorder();
+
+ @Override
+ public ErrorRecorder onCallStarted(CallInfo info, CallHeaders incomingHeaders, RequestContext context) {
+ return instance;
+ }
+ }
+ }
+
+ /**
+ * A producer that throws the given exception on a call.
+ */
+ static class ErrorProducer extends NoOpFlightProducer {
+
+ final RuntimeException error;
+
+ ErrorProducer(RuntimeException t) {
+ error = t;
+ }
+
+ @Override
+ public Runnable acceptPut(CallContext context, FlightStream flightStream, StreamListener<PutResult> ackStream) {
+ return () -> {
+ // Drain queue to avoid FlightStream#close cancelling the call
+ while (flightStream.next()) {
+ }
+ throw error;
+ };
+ }
+ }
+
+ /**
+ * A producer that throws the given exception on a call, but only after sending a success to the client.
+ */
+ static class ServerErrorProducer extends NoOpFlightProducer {
+
+ final RuntimeException error;
+
+ ServerErrorProducer(RuntimeException t) {
+ error = t;
+ }
+
+ @Override
+ public void getStream(CallContext context, Ticket ticket, ServerStreamListener listener) {
+ try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+ final VectorSchemaRoot root = VectorSchemaRoot.create(new Schema(Collections.emptyList()), allocator)) {
+ listener.start(root);
+ listener.completed();
+ }
+ throw error;
+ }
+
+ @Override
+ public void listFlights(CallContext context, Criteria criteria, StreamListener<FlightInfo> listener) {
+ listener.onCompleted();
+ throw error;
+ }
+
+ @Override
+ public FlightInfo getFlightInfo(CallContext context, FlightDescriptor descriptor) {
+ throw error;
+ }
+
+ @Override
+ public Runnable acceptPut(CallContext context, FlightStream flightStream, StreamListener<PutResult> ackStream) {
+ return () -> {
+ while (flightStream.next()) {
+ }
+ ackStream.onCompleted();
+ throw error;
+ };
+ }
+
+ @Override
+ public void doAction(CallContext context, Action action, StreamListener<Result> listener) {
+ listener.onCompleted();
+ throw error;
+ }
+
+ @Override
+ public void listActions(CallContext context, StreamListener<ActionType> listener) {
+ listener.onCompleted();
+ throw error;
+ }
+ }
+
+ static class ServerMiddlewarePair<T extends FlightServerMiddleware> {
+
+ final FlightServerMiddleware.Key<T> key;
+ final FlightServerMiddleware.Factory<T> factory;
+
+ ServerMiddlewarePair(Key<T> key, Factory<T> factory) {
+ this.key = key;
+ this.factory = factory;
+ }
+ }
+
+ /**
+ * Spin up a service with the given middleware and producer.
+ *
+ * @param producer The Flight producer to use.
+ * @param middleware A list of middleware to register.
+ * @param body A function to run as the body of the test.
+ * @param <T> The middleware type.
+ */
+ static <T extends FlightServerMiddleware> void test(FlightProducer producer, List<ServerMiddlewarePair<T>> middleware,
+ BiConsumer<BufferAllocator, FlightClient> body) {
+ try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE)) {
+ final FlightServer server = FlightTestUtil
+ .getStartedServer(location -> {
+ final FlightServer.Builder builder = FlightServer.builder(allocator, location, producer);
+ middleware.forEach(pair -> builder.middleware(pair.key, pair.factory));
+ return builder.build();
+ });
+ try (final FlightServer ignored = server;
+ final FlightClient client = FlightClient.builder(allocator, server.getLocation()).build()
+ ) {
+ body.accept(allocator, client);
+ }
+ } catch (InterruptedException | IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ static void test(FlightProducer producer, BiConsumer<BufferAllocator, FlightClient> body,
+ ErrorConsumer<ErrorRecorder> verify) {
+ final ErrorRecorder.Factory factory = new ErrorRecorder.Factory();
+ final List<ServerMiddlewarePair<ErrorRecorder>> middleware = Collections
+ .singletonList(new ServerMiddlewarePair<>(Key.of("m"), factory));
+ test(producer, middleware, (allocator, client) -> {
+ body.accept(allocator, client);
+ try {
+ verify.accept(factory.instance);
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ });
+ }
+
+ @FunctionalInterface
+ interface ErrorConsumer<T> {
+ void accept(T obj) throws Exception;
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestServerOptions.java b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestServerOptions.java
new file mode 100644
index 000000000..363ad443e
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestServerOptions.java
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+import java.io.File;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.function.Consumer;
+
+import org.apache.arrow.flight.TestBasicOperation.Producer;
+import org.apache.arrow.flight.auth.ServerAuthHandler;
+import org.apache.arrow.flight.impl.FlightServiceGrpc;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.junit.Assert;
+import org.junit.Assume;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import io.grpc.MethodDescriptor;
+import io.grpc.ServerServiceDefinition;
+import io.grpc.netty.NettyServerBuilder;
+
+@RunWith(JUnit4.class)
+public class TestServerOptions {
+
+ @Test
+ public void builderConsumer() throws Exception {
+ final AtomicBoolean consumerCalled = new AtomicBoolean();
+ final Consumer<NettyServerBuilder> consumer = (builder) -> consumerCalled.set(true);
+
+ try (
+ BufferAllocator a = new RootAllocator(Long.MAX_VALUE);
+ Producer producer = new Producer(a);
+ FlightServer s =
+ FlightTestUtil.getStartedServer(
+ (location) -> FlightServer.builder(a, location, producer)
+ .transportHint("grpc.builderConsumer", consumer).build()
+ )) {
+ Assert.assertTrue(consumerCalled.get());
+ }
+ }
+
+ /**
+ * Make sure that if Flight supplies a default executor to gRPC, then it is closed along with the server.
+ */
+ @Test
+ public void defaultExecutorClosed() throws Exception {
+ final ExecutorService executor;
+ try (
+ BufferAllocator a = new RootAllocator(Long.MAX_VALUE);
+ FlightServer server =
+ FlightTestUtil.getStartedServer(
+ (location) -> FlightServer.builder(a, location, new NoOpFlightProducer())
+ .build()
+ )) {
+ assertNotNull(server.grpcExecutor);
+ executor = server.grpcExecutor;
+ }
+ Assert.assertTrue(executor.isShutdown());
+ }
+
+ /**
+ * Make sure that if the user provides an executor to gRPC, then Flight does not close it.
+ */
+ @Test
+ public void suppliedExecutorNotClosed() throws Exception {
+ final ExecutorService executor = Executors.newSingleThreadExecutor();
+ try {
+ try (
+ BufferAllocator a = new RootAllocator(Long.MAX_VALUE);
+ FlightServer server =
+ FlightTestUtil.getStartedServer(
+ (location) -> FlightServer.builder(a, location, new NoOpFlightProducer())
+ .executor(executor)
+ .build()
+ )) {
+ Assert.assertNull(server.grpcExecutor);
+ }
+ Assert.assertFalse(executor.isShutdown());
+ } finally {
+ executor.shutdown();
+ }
+ }
+
+ @Test
+ public void domainSocket() throws Exception {
+ Assume.assumeTrue("We have a native transport available", FlightTestUtil.isNativeTransportAvailable());
+ final File domainSocket = File.createTempFile("flight-unit-test-", ".sock");
+ Assert.assertTrue(domainSocket.delete());
+ // Domain socket paths have a platform-dependent limit. Set a conservative limit and skip the test if the temporary
+ // file name is too long. (We do not assume a particular platform-dependent temporary directory path.)
+ Assume.assumeTrue("The domain socket path is not too long", domainSocket.getAbsolutePath().length() < 100);
+ final Location location = Location.forGrpcDomainSocket(domainSocket.getAbsolutePath());
+ try (
+ BufferAllocator a = new RootAllocator(Long.MAX_VALUE);
+ Producer producer = new Producer(a);
+ FlightServer s =
+ FlightTestUtil.getStartedServer(
+ (port) -> FlightServer.builder(a, location, producer).build()
+ )) {
+ try (FlightClient c = FlightClient.builder(a, location).build()) {
+ try (FlightStream stream = c.getStream(new Ticket(new byte[0]))) {
+ VectorSchemaRoot root = stream.getRoot();
+ IntVector iv = (IntVector) root.getVector("c1");
+ int value = 0;
+ while (stream.next()) {
+ for (int i = 0; i < root.getRowCount(); i++) {
+ Assert.assertEquals(value, iv.get(i));
+ value++;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void checkReflectionMetadata() {
+ // This metadata is needed for gRPC reflection to work.
+ final ExecutorService executorService = Executors.newSingleThreadExecutor();
+ try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE)) {
+ final FlightBindingService service = new FlightBindingService(allocator, new NoOpFlightProducer(),
+ ServerAuthHandler.NO_OP, executorService);
+ final ServerServiceDefinition definition = service.bindService();
+ assertEquals(FlightServiceGrpc.getServiceDescriptor().getSchemaDescriptor(),
+ definition.getServiceDescriptor().getSchemaDescriptor());
+
+ final Map<String, MethodDescriptor<?, ?>> definedMethods = new HashMap<>();
+ final Map<String, MethodDescriptor<?, ?>> serviceMethods = new HashMap<>();
+
+ // Make sure that the reflection metadata object is identical across all the places where it's accessible
+ definition.getMethods().forEach(
+ method -> definedMethods.put(method.getMethodDescriptor().getFullMethodName(), method.getMethodDescriptor()));
+ definition.getServiceDescriptor().getMethods().forEach(
+ method -> serviceMethods.put(method.getFullMethodName(), method));
+
+ for (final MethodDescriptor<?, ?> descriptor : FlightServiceGrpc.getServiceDescriptor().getMethods()) {
+ final String methodName = descriptor.getFullMethodName();
+ Assert.assertTrue("Method is missing from ServerServiceDefinition: " + methodName,
+ definedMethods.containsKey(methodName));
+ Assert.assertTrue("Method is missing from ServiceDescriptor: " + methodName,
+ definedMethods.containsKey(methodName));
+
+ assertEquals(descriptor.getSchemaDescriptor(), definedMethods.get(methodName).getSchemaDescriptor());
+ assertEquals(descriptor.getSchemaDescriptor(), serviceMethods.get(methodName).getSchemaDescriptor());
+ }
+ } finally {
+ executorService.shutdown();
+ }
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestTls.java b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestTls.java
new file mode 100644
index 000000000..c5cd871e2
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestTls.java
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.util.Iterator;
+import java.util.function.Consumer;
+
+import org.apache.arrow.flight.FlightClient.Builder;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Tests for TLS in Flight.
+ */
+public class TestTls {
+
+ /**
+ * Test a basic request over TLS.
+ */
+ @Test
+ public void connectTls() {
+ test((builder) -> {
+ try (final InputStream roots = new FileInputStream(FlightTestUtil.exampleTlsRootCert().toFile());
+ final FlightClient client = builder.trustedCertificates(roots).build()) {
+ final Iterator<Result> responses = client.doAction(new Action("hello-world"));
+ final byte[] response = responses.next().getBody();
+ Assert.assertEquals("Hello, world!", new String(response, StandardCharsets.UTF_8));
+ Assert.assertFalse(responses.hasNext());
+ } catch (InterruptedException | IOException e) {
+ throw new RuntimeException(e);
+ }
+ });
+ }
+
+ /**
+ * Make sure that connections are rejected when the root certificate isn't trusted.
+ */
+ @Test
+ public void rejectInvalidCert() {
+ test((builder) -> {
+ try (final FlightClient client = builder.build()) {
+ final Iterator<Result> responses = client.doAction(new Action("hello-world"));
+ FlightTestUtil.assertCode(FlightStatusCode.UNAVAILABLE, () -> responses.next().getBody());
+ } catch (InterruptedException e) {
+ throw new RuntimeException(e);
+ }
+ });
+ }
+
+ /**
+ * Make sure that connections are rejected when the hostname doesn't match.
+ */
+ @Test
+ public void rejectHostname() {
+ test((builder) -> {
+ try (final InputStream roots = new FileInputStream(FlightTestUtil.exampleTlsRootCert().toFile());
+ final FlightClient client = builder.trustedCertificates(roots).overrideHostname("fakehostname")
+ .build()) {
+ final Iterator<Result> responses = client.doAction(new Action("hello-world"));
+ FlightTestUtil.assertCode(FlightStatusCode.UNAVAILABLE, () -> responses.next().getBody());
+ } catch (InterruptedException | IOException e) {
+ throw new RuntimeException(e);
+ }
+ });
+ }
+
+ /**
+ * Test a basic request over TLS.
+ */
+ @Test
+ public void connectTlsDisableServerVerification() {
+ test((builder) -> {
+ try (final FlightClient client = builder.verifyServer(false).build()) {
+ final Iterator<Result> responses = client.doAction(new Action("hello-world"));
+ final byte[] response = responses.next().getBody();
+ Assert.assertEquals("Hello, world!", new String(response, StandardCharsets.UTF_8));
+ Assert.assertFalse(responses.hasNext());
+ } catch (InterruptedException e) {
+ throw new RuntimeException(e);
+ }
+ });
+ }
+
+ void test(Consumer<Builder> testFn) {
+ final FlightTestUtil.CertKeyPair certKey = FlightTestUtil.exampleTlsCerts().get(0);
+ try (
+ BufferAllocator a = new RootAllocator(Long.MAX_VALUE);
+ Producer producer = new Producer();
+ FlightServer s =
+ FlightTestUtil.getStartedServer(
+ (location) -> {
+ try {
+ return FlightServer.builder(a, location, producer)
+ .useTls(certKey.cert, certKey.key)
+ .build();
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ })) {
+ final Builder builder = FlightClient.builder(a, Location.forGrpcTls(FlightTestUtil.LOCALHOST, s.getPort()));
+ testFn.accept(builder);
+ } catch (InterruptedException | IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ static class Producer extends NoOpFlightProducer implements AutoCloseable {
+
+ @Override
+ public void doAction(CallContext context, Action action, StreamListener<Result> listener) {
+ if (action.getType().equals("hello-world")) {
+ listener.onNext(new Result("Hello, world!".getBytes(StandardCharsets.UTF_8)));
+ listener.onCompleted();
+ return;
+ }
+ listener
+ .onError(CallStatus.UNIMPLEMENTED.withDescription("Invalid action " + action.getType()).toRuntimeException());
+ }
+
+ @Override
+ public void close() {
+ }
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/auth/TestBasicAuth.java b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/auth/TestBasicAuth.java
new file mode 100644
index 000000000..c18f5709b
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/auth/TestBasicAuth.java
@@ -0,0 +1,158 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.auth;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.Optional;
+
+import org.apache.arrow.flight.Criteria;
+import org.apache.arrow.flight.FlightClient;
+import org.apache.arrow.flight.FlightInfo;
+import org.apache.arrow.flight.FlightServer;
+import org.apache.arrow.flight.FlightStatusCode;
+import org.apache.arrow.flight.FlightStream;
+import org.apache.arrow.flight.FlightTestUtil;
+import org.apache.arrow.flight.NoOpFlightProducer;
+import org.apache.arrow.flight.Ticket;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.util.AutoCloseables;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import com.google.common.collect.ImmutableList;
+
+public class TestBasicAuth {
+
+ private static final String USERNAME = "flight";
+ private static final String PASSWORD = "woohoo";
+ private static final byte[] VALID_TOKEN = "my_token".getBytes(StandardCharsets.UTF_8);
+
+ private FlightClient client;
+ private FlightServer server;
+ private BufferAllocator allocator;
+
+ @Test
+ public void validAuth() {
+ client.authenticateBasic(USERNAME, PASSWORD);
+ Assert.assertTrue(ImmutableList.copyOf(client.listFlights(Criteria.ALL)).size() == 0);
+ }
+
+ // ARROW-7722: this test occasionally leaks memory
+ @Ignore
+ @Test
+ public void asyncCall() throws Exception {
+ client.authenticateBasic(USERNAME, PASSWORD);
+ client.listFlights(Criteria.ALL);
+ try (final FlightStream s = client.getStream(new Ticket(new byte[1]))) {
+ while (s.next()) {
+ Assert.assertEquals(4095, s.getRoot().getRowCount());
+ }
+ }
+ }
+
+ @Test
+ public void invalidAuth() {
+ FlightTestUtil.assertCode(FlightStatusCode.UNAUTHENTICATED, () -> {
+ client.authenticateBasic(USERNAME, "WRONG");
+ });
+
+ FlightTestUtil.assertCode(FlightStatusCode.UNAUTHENTICATED, () -> {
+ client.listFlights(Criteria.ALL).forEach(action -> Assert.fail());
+ });
+ }
+
+ @Test
+ public void didntAuth() {
+ FlightTestUtil.assertCode(FlightStatusCode.UNAUTHENTICATED, () -> {
+ client.listFlights(Criteria.ALL).forEach(action -> Assert.fail());
+ });
+ }
+
+ @Before
+ public void setup() throws IOException {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ final BasicServerAuthHandler.BasicAuthValidator validator = new BasicServerAuthHandler.BasicAuthValidator() {
+
+ @Override
+ public Optional<String> isValid(byte[] token) {
+ if (Arrays.equals(token, VALID_TOKEN)) {
+ return Optional.of(USERNAME);
+ }
+ return Optional.empty();
+ }
+
+ @Override
+ public byte[] getToken(String username, String password) {
+ if (USERNAME.equals(username) && PASSWORD.equals(password)) {
+ return VALID_TOKEN;
+ } else {
+ throw new IllegalArgumentException("invalid credentials");
+ }
+ }
+ };
+
+ server = FlightTestUtil.getStartedServer((location) -> FlightServer.builder(
+ allocator,
+ location,
+ new NoOpFlightProducer() {
+ @Override
+ public void listFlights(CallContext context, Criteria criteria,
+ StreamListener<FlightInfo> listener) {
+ if (!context.peerIdentity().equals(USERNAME)) {
+ listener.onError(new IllegalArgumentException("Invalid username"));
+ return;
+ }
+ listener.onCompleted();
+ }
+
+ @Override
+ public void getStream(CallContext context, Ticket ticket, ServerStreamListener listener) {
+ if (!context.peerIdentity().equals(USERNAME)) {
+ listener.error(new IllegalArgumentException("Invalid username"));
+ return;
+ }
+ final Schema pojoSchema = new Schema(ImmutableList.of(Field.nullable("a",
+ Types.MinorType.BIGINT.getType())));
+ try (VectorSchemaRoot root = VectorSchemaRoot.create(pojoSchema, allocator)) {
+ listener.start(root);
+ root.allocateNew();
+ root.setRowCount(4095);
+ listener.putNext();
+ listener.completed();
+ }
+ }
+ }).authHandler(new BasicServerAuthHandler(validator)).build());
+ client = FlightClient.builder(allocator, server.getLocation()).build();
+ }
+
+ @After
+ public void shutdown() throws Exception {
+ AutoCloseables.close(client, server, allocator);
+ }
+
+}
diff --git a/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/auth2/TestBasicAuth2.java b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/auth2/TestBasicAuth2.java
new file mode 100644
index 000000000..9bec32f1b
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/auth2/TestBasicAuth2.java
@@ -0,0 +1,232 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.auth2;
+
+import java.io.IOException;
+
+import org.apache.arrow.flight.CallStatus;
+import org.apache.arrow.flight.Criteria;
+import org.apache.arrow.flight.FlightClient;
+import org.apache.arrow.flight.FlightInfo;
+import org.apache.arrow.flight.FlightProducer;
+import org.apache.arrow.flight.FlightServer;
+import org.apache.arrow.flight.FlightStatusCode;
+import org.apache.arrow.flight.FlightStream;
+import org.apache.arrow.flight.FlightTestUtil;
+import org.apache.arrow.flight.NoOpFlightProducer;
+import org.apache.arrow.flight.Ticket;
+import org.apache.arrow.flight.grpc.CredentialCallOption;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.util.AutoCloseables;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import com.google.common.base.Strings;
+import com.google.common.collect.ImmutableList;
+
+public class TestBasicAuth2 {
+
+ private static final String USERNAME_1 = "flight1";
+ private static final String USERNAME_2 = "flight2";
+ private static final String NO_USERNAME = "";
+ private static final String PASSWORD_1 = "woohoo1";
+ private static final String PASSWORD_2 = "woohoo2";
+ private BufferAllocator allocator;
+ private FlightServer server;
+ private FlightClient client;
+ private FlightClient client2;
+
+ @Before
+ public void setup() throws Exception {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ startServerAndClient();
+ }
+
+ private FlightProducer getFlightProducer() {
+ return new NoOpFlightProducer() {
+ @Override
+ public void listFlights(CallContext context, Criteria criteria,
+ StreamListener<FlightInfo> listener) {
+ if (!context.peerIdentity().equals(USERNAME_1) && !context.peerIdentity().equals(USERNAME_2)) {
+ listener.onError(new IllegalArgumentException("Invalid username"));
+ return;
+ }
+ listener.onCompleted();
+ }
+
+ @Override
+ public void getStream(CallContext context, Ticket ticket, ServerStreamListener listener) {
+ if (!context.peerIdentity().equals(USERNAME_1) && !context.peerIdentity().equals(USERNAME_2)) {
+ listener.error(new IllegalArgumentException("Invalid username"));
+ return;
+ }
+ final Schema pojoSchema = new Schema(ImmutableList.of(Field.nullable("a",
+ Types.MinorType.BIGINT.getType())));
+ try (VectorSchemaRoot root = VectorSchemaRoot.create(pojoSchema, allocator)) {
+ listener.start(root);
+ root.allocateNew();
+ root.setRowCount(4095);
+ listener.putNext();
+ listener.completed();
+ }
+ }
+ };
+ }
+
+ private void startServerAndClient() throws IOException {
+ final FlightProducer flightProducer = getFlightProducer();
+ this.server = FlightTestUtil.getStartedServer((location) -> FlightServer
+ .builder(allocator, location, flightProducer)
+ .headerAuthenticator(new GeneratedBearerTokenAuthenticator(
+ new BasicCallHeaderAuthenticator(this::validate)))
+ .build());
+
+ this.client = FlightClient.builder(allocator, server.getLocation())
+ .build();
+ }
+
+ @After
+ public void shutdown() throws Exception {
+ AutoCloseables.close(client, client2, server, allocator);
+ client = null;
+ client2 = null;
+ server = null;
+ allocator = null;
+ }
+
+ private void startClient2() throws IOException {
+ client2 = FlightClient.builder(allocator, server.getLocation())
+ .build();
+ }
+
+ private CallHeaderAuthenticator.AuthResult validate(String username, String password) {
+ if (Strings.isNullOrEmpty(username)) {
+ throw CallStatus.UNAUTHENTICATED.withDescription("Credentials not supplied.").toRuntimeException();
+ }
+ final String identity;
+ if (USERNAME_1.equals(username) && PASSWORD_1.equals(password)) {
+ identity = USERNAME_1;
+ } else if (USERNAME_2.equals(username) && PASSWORD_2.equals(password)) {
+ identity = USERNAME_2;
+ } else {
+ throw CallStatus.UNAUTHENTICATED.withDescription("Username or password is invalid.").toRuntimeException();
+ }
+ return () -> identity;
+ }
+
+ @Test
+ public void validAuthWithBearerAuthServer() throws IOException {
+ testValidAuth(client);
+ }
+
+ @Test
+ public void validAuthWithMultipleClientsWithSameCredentialsWithBearerAuthServer() throws IOException {
+ startClient2();
+ testValidAuthWithMultipleClientsWithSameCredentials(client, client2);
+ }
+
+ @Test
+ public void validAuthWithMultipleClientsWithDifferentCredentialsWithBearerAuthServer() throws IOException {
+ startClient2();
+ testValidAuthWithMultipleClientsWithDifferentCredentials(client, client2);
+ }
+
+ // ARROW-7722: this test occasionally leaks memory
+ @Ignore
+ @Test
+ public void asyncCall() throws Exception {
+ final CredentialCallOption bearerToken = client
+ .authenticateBasicToken(USERNAME_1, PASSWORD_1).get();
+ client.listFlights(Criteria.ALL, bearerToken);
+ try (final FlightStream s = client.getStream(new Ticket(new byte[1]))) {
+ while (s.next()) {
+ Assert.assertEquals(4095, s.getRoot().getRowCount());
+ }
+ }
+ }
+
+ @Test
+ public void invalidAuthWithBearerAuthServer() throws IOException {
+ testInvalidAuth(client);
+ }
+
+ @Test
+ public void didntAuthWithBearerAuthServer() throws IOException {
+ didntAuth(client);
+ }
+
+ private void testValidAuth(FlightClient client) {
+ final CredentialCallOption bearerToken = client
+ .authenticateBasicToken(USERNAME_1, PASSWORD_1).get();
+ Assert.assertTrue(ImmutableList.copyOf(client
+ .listFlights(Criteria.ALL, bearerToken))
+ .isEmpty());
+ }
+
+ private void testValidAuthWithMultipleClientsWithSameCredentials(
+ FlightClient client1, FlightClient client2) {
+ final CredentialCallOption bearerToken1 = client1
+ .authenticateBasicToken(USERNAME_1, PASSWORD_1).get();
+ final CredentialCallOption bearerToken2 = client2
+ .authenticateBasicToken(USERNAME_1, PASSWORD_1).get();
+ Assert.assertTrue(ImmutableList.copyOf(client1
+ .listFlights(Criteria.ALL, bearerToken1))
+ .isEmpty());
+ Assert.assertTrue(ImmutableList.copyOf(client2
+ .listFlights(Criteria.ALL, bearerToken2))
+ .isEmpty());
+ }
+
+ private void testValidAuthWithMultipleClientsWithDifferentCredentials(
+ FlightClient client1, FlightClient client2) {
+ final CredentialCallOption bearerToken1 = client1
+ .authenticateBasicToken(USERNAME_1, PASSWORD_1).get();
+ final CredentialCallOption bearerToken2 = client2
+ .authenticateBasicToken(USERNAME_2, PASSWORD_2).get();
+ Assert.assertTrue(ImmutableList.copyOf(client1
+ .listFlights(Criteria.ALL, bearerToken1))
+ .isEmpty());
+ Assert.assertTrue(ImmutableList.copyOf(client2
+ .listFlights(Criteria.ALL, bearerToken2))
+ .isEmpty());
+ }
+
+ private void testInvalidAuth(FlightClient client) {
+ FlightTestUtil.assertCode(FlightStatusCode.UNAUTHENTICATED, () ->
+ client.authenticateBasicToken(USERNAME_1, "WRONG"));
+
+ FlightTestUtil.assertCode(FlightStatusCode.UNAUTHENTICATED, () ->
+ client.authenticateBasicToken(NO_USERNAME, PASSWORD_1));
+
+ FlightTestUtil.assertCode(FlightStatusCode.UNAUTHENTICATED, () ->
+ client.listFlights(Criteria.ALL).forEach(action -> Assert.fail()));
+ }
+
+ private void didntAuth(FlightClient client) {
+ FlightTestUtil.assertCode(FlightStatusCode.UNAUTHENTICATED, () ->
+ client.listFlights(Criteria.ALL).forEach(action -> Assert.fail()));
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/client/TestCookieHandling.java b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/client/TestCookieHandling.java
new file mode 100644
index 000000000..f205f9a3b
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/client/TestCookieHandling.java
@@ -0,0 +1,267 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.client;
+
+import java.io.IOException;
+
+import org.apache.arrow.flight.CallHeaders;
+import org.apache.arrow.flight.CallInfo;
+import org.apache.arrow.flight.CallStatus;
+import org.apache.arrow.flight.Criteria;
+import org.apache.arrow.flight.ErrorFlightMetadata;
+import org.apache.arrow.flight.FlightClient;
+import org.apache.arrow.flight.FlightInfo;
+import org.apache.arrow.flight.FlightMethod;
+import org.apache.arrow.flight.FlightProducer;
+import org.apache.arrow.flight.FlightServer;
+import org.apache.arrow.flight.FlightServerMiddleware;
+import org.apache.arrow.flight.FlightTestUtil;
+import org.apache.arrow.flight.NoOpFlightProducer;
+import org.apache.arrow.flight.RequestContext;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.util.AutoCloseables;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Ignore;
+import org.junit.Test;
+
+/**
+ * Tests for correct handling of cookies from the FlightClient using {@link ClientCookieMiddleware}.
+ */
+public class TestCookieHandling {
+ private static final String SET_COOKIE_HEADER = "Set-Cookie";
+ private static final String COOKIE_HEADER = "Cookie";
+ private BufferAllocator allocator;
+ private FlightServer server;
+ private FlightClient client;
+
+ private ClientCookieMiddlewareTestFactory testFactory = new ClientCookieMiddlewareTestFactory();
+ private ClientCookieMiddleware cookieMiddleware = new ClientCookieMiddleware(testFactory);
+
+ @Before
+ public void setup() throws Exception {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ startServerAndClient();
+ }
+
+ @After
+ public void cleanup() throws Exception {
+ testFactory = new ClientCookieMiddlewareTestFactory();
+ cookieMiddleware = testFactory.onCallStarted(new CallInfo(FlightMethod.DO_ACTION));
+ AutoCloseables.close(client, server, allocator);
+ client = null;
+ server = null;
+ allocator = null;
+ }
+
+ @Test
+ public void basicCookie() {
+ CallHeaders headersToSend = new ErrorFlightMetadata();
+ headersToSend.insert(SET_COOKIE_HEADER, "k=v");
+ cookieMiddleware = testFactory.onCallStarted(new CallInfo(FlightMethod.DO_ACTION));
+ cookieMiddleware.onHeadersReceived(headersToSend);
+ Assert.assertEquals("k=v", cookieMiddleware.getValidCookiesAsString());
+ }
+
+ @Test
+ public void cookieStaysAfterMultipleRequests() {
+ CallHeaders headersToSend = new ErrorFlightMetadata();
+ headersToSend.insert(SET_COOKIE_HEADER, "k=v");
+ cookieMiddleware = testFactory.onCallStarted(new CallInfo(FlightMethod.DO_ACTION));
+ cookieMiddleware.onHeadersReceived(headersToSend);
+ Assert.assertEquals("k=v", cookieMiddleware.getValidCookiesAsString());
+
+ headersToSend = new ErrorFlightMetadata();
+ cookieMiddleware = testFactory.onCallStarted(new CallInfo(FlightMethod.DO_ACTION));
+ cookieMiddleware.onHeadersReceived(headersToSend);
+ Assert.assertEquals("k=v", cookieMiddleware.getValidCookiesAsString());
+
+ headersToSend = new ErrorFlightMetadata();
+ cookieMiddleware = testFactory.onCallStarted(new CallInfo(FlightMethod.DO_ACTION));
+ cookieMiddleware.onHeadersReceived(headersToSend);
+ Assert.assertEquals("k=v", cookieMiddleware.getValidCookiesAsString());
+ }
+
+ @Ignore
+ @Test
+ public void cookieAutoExpires() {
+ CallHeaders headersToSend = new ErrorFlightMetadata();
+ headersToSend.insert(SET_COOKIE_HEADER, "k=v; Max-Age=2");
+ cookieMiddleware = testFactory.onCallStarted(new CallInfo(FlightMethod.DO_ACTION));
+ cookieMiddleware.onHeadersReceived(headersToSend);
+ // Note: using max-age changes cookie version from 0->1, which quotes values.
+ Assert.assertEquals("k=\"v\"", cookieMiddleware.getValidCookiesAsString());
+
+ headersToSend = new ErrorFlightMetadata();
+ cookieMiddleware = testFactory.onCallStarted(new CallInfo(FlightMethod.DO_ACTION));
+ cookieMiddleware.onHeadersReceived(headersToSend);
+ Assert.assertEquals("k=\"v\"", cookieMiddleware.getValidCookiesAsString());
+
+ try {
+ Thread.sleep(5000);
+ } catch (InterruptedException ignored) {
+ }
+
+ // Verify that the k cookie was discarded because it expired.
+ Assert.assertTrue(cookieMiddleware.getValidCookiesAsString().isEmpty());
+ }
+
+ @Test
+ public void cookieExplicitlyExpires() {
+ CallHeaders headersToSend = new ErrorFlightMetadata();
+ headersToSend.insert(SET_COOKIE_HEADER, "k=v; Max-Age=2");
+ cookieMiddleware = testFactory.onCallStarted(new CallInfo(FlightMethod.DO_ACTION));
+ cookieMiddleware.onHeadersReceived(headersToSend);
+ // Note: using max-age changes cookie version from 0->1, which quotes values.
+ Assert.assertEquals("k=\"v\"", cookieMiddleware.getValidCookiesAsString());
+
+ // Note: The JDK treats Max-Age < 0 as not expired and treats 0 as expired.
+ // This violates the RFC, which states that less than zero and zero should both be expired.
+ headersToSend = new ErrorFlightMetadata();
+ headersToSend.insert(SET_COOKIE_HEADER, "k=v; Max-Age=0");
+ cookieMiddleware = testFactory.onCallStarted(new CallInfo(FlightMethod.DO_ACTION));
+ cookieMiddleware.onHeadersReceived(headersToSend);
+
+ // Verify that the k cookie was discarded because the server told the client it is expired.
+ Assert.assertTrue(cookieMiddleware.getValidCookiesAsString().isEmpty());
+ }
+
+ @Ignore
+ @Test
+ public void cookieExplicitlyExpiresWithMaxAgeMinusOne() {
+ CallHeaders headersToSend = new ErrorFlightMetadata();
+ headersToSend.insert(SET_COOKIE_HEADER, "k=v; Max-Age=2");
+ cookieMiddleware = testFactory.onCallStarted(new CallInfo(FlightMethod.DO_ACTION));
+ cookieMiddleware.onHeadersReceived(headersToSend);
+ // Note: using max-age changes cookie version from 0->1, which quotes values.
+ Assert.assertEquals("k=\"v\"", cookieMiddleware.getValidCookiesAsString());
+
+ headersToSend = new ErrorFlightMetadata();
+
+ // The Java HttpCookie class has a bug where it uses a -1 maxAge to indicate
+ // a persistent cookie, when the RFC spec says this should mean the cookie expires immediately.
+ headersToSend.insert(SET_COOKIE_HEADER, "k=v; Max-Age=-1");
+ cookieMiddleware = testFactory.onCallStarted(new CallInfo(FlightMethod.DO_ACTION));
+ cookieMiddleware.onHeadersReceived(headersToSend);
+
+ // Verify that the k cookie was discarded because the server told the client it is expired.
+ Assert.assertTrue(cookieMiddleware.getValidCookiesAsString().isEmpty());
+ }
+
+ @Test
+ public void changeCookieValue() {
+ CallHeaders headersToSend = new ErrorFlightMetadata();
+ headersToSend.insert(SET_COOKIE_HEADER, "k=v");
+ cookieMiddleware.onHeadersReceived(headersToSend);
+ Assert.assertEquals("k=v", cookieMiddleware.getValidCookiesAsString());
+
+ headersToSend = new ErrorFlightMetadata();
+ headersToSend.insert(SET_COOKIE_HEADER, "k=v2");
+ cookieMiddleware.onHeadersReceived(headersToSend);
+ Assert.assertEquals("k=v2", cookieMiddleware.getValidCookiesAsString());
+ }
+
+ @Test
+ public void multipleCookiesWithSetCookie() {
+ CallHeaders headersToSend = new ErrorFlightMetadata();
+ headersToSend.insert(SET_COOKIE_HEADER, "firstKey=firstVal");
+ headersToSend.insert(SET_COOKIE_HEADER, "secondKey=secondVal");
+ cookieMiddleware.onHeadersReceived(headersToSend);
+ Assert.assertEquals("firstKey=firstVal; secondKey=secondVal", cookieMiddleware.getValidCookiesAsString());
+ }
+
+ @Test
+ public void cookieStaysAfterMultipleRequestsEndToEnd() {
+ client.handshake();
+ Assert.assertEquals("k=v", testFactory.clientCookieMiddleware.getValidCookiesAsString());
+ client.handshake();
+ Assert.assertEquals("k=v", testFactory.clientCookieMiddleware.getValidCookiesAsString());
+ client.listFlights(Criteria.ALL);
+ Assert.assertEquals("k=v", testFactory.clientCookieMiddleware.getValidCookiesAsString());
+ }
+
+ /**
+ * A server middleware component that injects SET_COOKIE_HEADER into the outgoing headers.
+ */
+ static class SetCookieHeaderInjector implements FlightServerMiddleware {
+ private final Factory factory;
+
+ public SetCookieHeaderInjector(Factory factory) {
+ this.factory = factory;
+ }
+
+ @Override
+ public void onBeforeSendingHeaders(CallHeaders outgoingHeaders) {
+ if (!factory.receivedCookieHeader) {
+ outgoingHeaders.insert(SET_COOKIE_HEADER, "k=v");
+ }
+ }
+
+ @Override
+ public void onCallCompleted(CallStatus status) {
+
+ }
+
+ @Override
+ public void onCallErrored(Throwable err) {
+
+ }
+
+ static class Factory implements FlightServerMiddleware.Factory<SetCookieHeaderInjector> {
+ private boolean receivedCookieHeader = false;
+
+ @Override
+ public SetCookieHeaderInjector onCallStarted(CallInfo info, CallHeaders incomingHeaders,
+ RequestContext context) {
+ receivedCookieHeader = null != incomingHeaders.get(COOKIE_HEADER);
+ return new SetCookieHeaderInjector(this);
+ }
+ }
+ }
+
+ public static class ClientCookieMiddlewareTestFactory extends ClientCookieMiddleware.Factory {
+
+ private ClientCookieMiddleware clientCookieMiddleware;
+
+ @Override
+ public ClientCookieMiddleware onCallStarted(CallInfo info) {
+ this.clientCookieMiddleware = new ClientCookieMiddleware(this);
+ return this.clientCookieMiddleware;
+ }
+ }
+
+ private void startServerAndClient() throws IOException {
+ final FlightProducer flightProducer = new NoOpFlightProducer() {
+ public void listFlights(CallContext context, Criteria criteria,
+ StreamListener<FlightInfo> listener) {
+ listener.onCompleted();
+ }
+ };
+
+ this.server = FlightTestUtil.getStartedServer((location) -> FlightServer
+ .builder(allocator, location, flightProducer)
+ .middleware(FlightServerMiddleware.Key.of("test"), new SetCookieHeaderInjector.Factory())
+ .build());
+
+ this.client = FlightClient.builder(allocator, server.getLocation())
+ .intercept(testFactory)
+ .build();
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/example/TestExampleServer.java b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/example/TestExampleServer.java
new file mode 100644
index 000000000..fb157f45e
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/example/TestExampleServer.java
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.example;
+
+import java.io.IOException;
+
+import org.apache.arrow.flight.AsyncPutListener;
+import org.apache.arrow.flight.FlightClient;
+import org.apache.arrow.flight.FlightClient.ClientStreamListener;
+import org.apache.arrow.flight.FlightDescriptor;
+import org.apache.arrow.flight.FlightInfo;
+import org.apache.arrow.flight.FlightStream;
+import org.apache.arrow.flight.FlightTestUtil;
+import org.apache.arrow.flight.Location;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.util.AutoCloseables;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Ignore;
+import org.junit.Test;
+
+/**
+ * Ensure that example server supports get and put.
+ */
+public class TestExampleServer {
+
+ private BufferAllocator allocator;
+ private BufferAllocator caseAllocator;
+ private ExampleFlightServer server;
+ private FlightClient client;
+
+ @Before
+ public void start() throws IOException {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+
+ Location l = Location.forGrpcInsecure(FlightTestUtil.LOCALHOST, 12233);
+ if (!Boolean.getBoolean("disableServer")) {
+ System.out.println("Starting server.");
+ server = new ExampleFlightServer(allocator, l);
+ server.start();
+ } else {
+ System.out.println("Skipping server startup.");
+ }
+ client = FlightClient.builder(allocator, l).build();
+ caseAllocator = allocator.newChildAllocator("test-case", 0, Long.MAX_VALUE);
+ }
+
+ @After
+ public void after() throws Exception {
+ AutoCloseables.close(server, client, caseAllocator, allocator);
+ }
+
+ @Test
+ @Ignore
+ public void putStream() {
+ BufferAllocator a = caseAllocator;
+ final int size = 10;
+
+ IntVector iv = new IntVector("c1", a);
+
+ VectorSchemaRoot root = VectorSchemaRoot.of(iv);
+ ClientStreamListener listener = client.startPut(FlightDescriptor.path("hello"), root,
+ new AsyncPutListener());
+
+ //batch 1
+ root.allocateNew();
+ for (int i = 0; i < size; i++) {
+ iv.set(i, i);
+ }
+ iv.setValueCount(size);
+ root.setRowCount(size);
+ listener.putNext();
+
+ // batch 2
+
+ root.allocateNew();
+ for (int i = 0; i < size; i++) {
+ iv.set(i, i + size);
+ }
+ iv.setValueCount(size);
+ root.setRowCount(size);
+ listener.putNext();
+ root.clear();
+ listener.completed();
+
+ // wait for ack to avoid memory leaks.
+ listener.getResult();
+
+ FlightInfo info = client.getInfo(FlightDescriptor.path("hello"));
+ try (final FlightStream stream = client.getStream(info.getEndpoints().get(0).getTicket())) {
+ VectorSchemaRoot newRoot = stream.getRoot();
+ while (stream.next()) {
+ newRoot.clear();
+ }
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/grpc/TestStatusUtils.java b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/grpc/TestStatusUtils.java
new file mode 100644
index 000000000..5d76e8ae1
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/grpc/TestStatusUtils.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.grpc;
+
+import org.apache.arrow.flight.CallStatus;
+import org.apache.arrow.flight.FlightStatusCode;
+import org.junit.Assert;
+import org.junit.Test;
+
+import io.grpc.Metadata;
+import io.grpc.Status;
+
+public class TestStatusUtils {
+
+ @Test
+ public void testParseTrailers() {
+ Status status = Status.CANCELLED;
+ Metadata trailers = new Metadata();
+
+ // gRPC can have trailers with certain metadata keys beginning with ":", such as ":status".
+ // See https://github.com/grpc/grpc/blob/master/doc/PROTOCOL-HTTP2.md
+ trailers.put(StatusUtils.keyOfAscii(":status"), "502");
+ trailers.put(StatusUtils.keyOfAscii("date"), "Fri, 13 Sep 2015 11:23:58 GMT");
+ trailers.put(StatusUtils.keyOfAscii("content-type"), "text/html");
+
+ CallStatus callStatus = StatusUtils.fromGrpcStatusAndTrailers(status, trailers);
+
+ Assert.assertEquals(FlightStatusCode.CANCELLED, callStatus.code());
+ Assert.assertTrue(callStatus.metadata().containsKey(":status"));
+ Assert.assertEquals("502", callStatus.metadata().get(":status"));
+ Assert.assertTrue(callStatus.metadata().containsKey("date"));
+ Assert.assertEquals("Fri, 13 Sep 2015 11:23:58 GMT", callStatus.metadata().get("date"));
+ Assert.assertTrue(callStatus.metadata().containsKey("content-type"));
+ Assert.assertEquals("text/html", callStatus.metadata().get("content-type"));
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/perf/PerformanceTestServer.java b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/perf/PerformanceTestServer.java
new file mode 100644
index 000000000..7794ed748
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/perf/PerformanceTestServer.java
@@ -0,0 +1,216 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.perf;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+
+import org.apache.arrow.flight.BackpressureStrategy;
+import org.apache.arrow.flight.FlightDescriptor;
+import org.apache.arrow.flight.FlightEndpoint;
+import org.apache.arrow.flight.FlightInfo;
+import org.apache.arrow.flight.FlightProducer;
+import org.apache.arrow.flight.FlightServer;
+import org.apache.arrow.flight.Location;
+import org.apache.arrow.flight.NoOpFlightProducer;
+import org.apache.arrow.flight.Ticket;
+import org.apache.arrow.flight.perf.impl.PerfOuterClass.Perf;
+import org.apache.arrow.flight.perf.impl.PerfOuterClass.Token;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.util.AutoCloseables;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+import com.google.common.collect.ImmutableList;
+import com.google.protobuf.InvalidProtocolBufferException;
+
+public class PerformanceTestServer implements AutoCloseable {
+
+ private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(PerformanceTestServer.class);
+
+ private final FlightServer flightServer;
+ private final Location location;
+ private final BufferAllocator allocator;
+ private final PerfProducer producer;
+ private final boolean isNonBlocking;
+
+ public PerformanceTestServer(BufferAllocator incomingAllocator, Location location) {
+ this(incomingAllocator, location, new BackpressureStrategy() {
+ private FlightProducer.ServerStreamListener listener;
+
+ @Override
+ public void register(FlightProducer.ServerStreamListener listener) {
+ this.listener = listener;
+ }
+
+ @Override
+ public WaitResult waitForListener(long timeout) {
+ while (!listener.isReady() && !listener.isCancelled()) {
+ // busy wait
+ }
+ return WaitResult.READY;
+ }
+ }, false);
+ }
+
+ public PerformanceTestServer(BufferAllocator incomingAllocator, Location location, BackpressureStrategy bpStrategy,
+ boolean isNonBlocking) {
+ this.allocator = incomingAllocator.newChildAllocator("perf-server", 0, Long.MAX_VALUE);
+ this.location = location;
+ this.producer = new PerfProducer(bpStrategy);
+ this.flightServer = FlightServer.builder(this.allocator, location, producer).build();
+ this.isNonBlocking = isNonBlocking;
+ }
+
+ public Location getLocation() {
+ return location;
+ }
+
+ public void start() throws IOException {
+ flightServer.start();
+ }
+
+ @Override
+ public void close() throws Exception {
+ AutoCloseables.close(flightServer, allocator);
+ }
+
+ private final class PerfProducer extends NoOpFlightProducer {
+ private final BackpressureStrategy bpStrategy;
+
+ private PerfProducer(BackpressureStrategy bpStrategy) {
+ this.bpStrategy = bpStrategy;
+ }
+
+ @Override
+ public void getStream(CallContext context, Ticket ticket,
+ ServerStreamListener listener) {
+ bpStrategy.register(listener);
+ final Runnable loadData = () -> {
+ VectorSchemaRoot root = null;
+ try {
+ Token token = Token.parseFrom(ticket.getBytes());
+ Perf perf = token.getDefinition();
+ Schema schema = Schema.deserialize(ByteBuffer.wrap(perf.getSchema().toByteArray()));
+ root = VectorSchemaRoot.create(schema, allocator);
+ BigIntVector a = (BigIntVector) root.getVector("a");
+ BigIntVector b = (BigIntVector) root.getVector("b");
+ BigIntVector c = (BigIntVector) root.getVector("c");
+ BigIntVector d = (BigIntVector) root.getVector("d");
+ listener.setUseZeroCopy(true);
+ listener.start(root);
+ root.allocateNew();
+
+ int current = 0;
+ long i = token.getStart();
+ while (i < token.getEnd()) {
+ if (listener.isCancelled()) {
+ root.clear();
+ return;
+ }
+
+ if (TestPerf.VALIDATE) {
+ a.setSafe(current, i);
+ }
+
+ i++;
+ current++;
+ if (i % perf.getRecordsPerBatch() == 0) {
+ root.setRowCount(current);
+
+ bpStrategy.waitForListener(0);
+ if (listener.isCancelled()) {
+ root.clear();
+ return;
+ }
+ listener.putNext();
+ current = 0;
+ root.allocateNew();
+ }
+ }
+
+ // send last partial batch.
+ if (current != 0) {
+ root.setRowCount(current);
+ listener.putNext();
+ }
+ listener.completed();
+ } catch (InvalidProtocolBufferException e) {
+ throw new RuntimeException(e);
+ } finally {
+ try {
+ AutoCloseables.close(root);
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+ };
+
+ if (!isNonBlocking) {
+ loadData.run();
+ } else {
+ final ExecutorService service = Executors.newSingleThreadExecutor();
+ service.submit(loadData);
+ service.shutdown();
+ }
+ }
+
+ @Override
+ public FlightInfo getFlightInfo(CallContext context,
+ FlightDescriptor descriptor) {
+ try {
+ Preconditions.checkArgument(descriptor.isCommand());
+ Perf exec = Perf.parseFrom(descriptor.getCommand());
+
+ final Schema pojoSchema = new Schema(ImmutableList.of(
+ Field.nullable("a", MinorType.BIGINT.getType()),
+ Field.nullable("b", MinorType.BIGINT.getType()),
+ Field.nullable("c", MinorType.BIGINT.getType()),
+ Field.nullable("d", MinorType.BIGINT.getType())
+ ));
+
+ Token token = Token.newBuilder().setDefinition(exec)
+ .setStart(0)
+ .setEnd(exec.getRecordsPerStream())
+ .build();
+ final Ticket ticket = new Ticket(token.toByteArray());
+
+ List<FlightEndpoint> endpoints = new ArrayList<>();
+ for (int i = 0; i < exec.getStreamCount(); i++) {
+ endpoints.add(new FlightEndpoint(ticket, getLocation()));
+ }
+
+ return new FlightInfo(pojoSchema, descriptor, endpoints, -1,
+ exec.getRecordsPerStream() * exec.getStreamCount());
+ } catch (InvalidProtocolBufferException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ }
+}
+
+
+
diff --git a/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/perf/TestPerf.java b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/perf/TestPerf.java
new file mode 100644
index 000000000..9e2d7cc54
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/test/java/org/apache/arrow/flight/perf/TestPerf.java
@@ -0,0 +1,199 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.perf;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.concurrent.Callable;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+
+import org.apache.arrow.flight.FlightClient;
+import org.apache.arrow.flight.FlightDescriptor;
+import org.apache.arrow.flight.FlightInfo;
+import org.apache.arrow.flight.FlightStream;
+import org.apache.arrow.flight.FlightTestUtil;
+import org.apache.arrow.flight.Ticket;
+import org.apache.arrow.flight.perf.impl.PerfOuterClass.Perf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.Test;
+
+import com.google.common.base.MoreObjects;
+import com.google.common.base.Stopwatch;
+import com.google.common.collect.ImmutableList;
+import com.google.common.util.concurrent.Futures;
+import com.google.common.util.concurrent.ListenableFuture;
+import com.google.common.util.concurrent.ListeningExecutorService;
+import com.google.common.util.concurrent.MoreExecutors;
+import com.google.protobuf.ByteString;
+
+@org.junit.Ignore
+public class TestPerf {
+
+ public static final boolean VALIDATE = false;
+
+ public static FlightDescriptor getPerfFlightDescriptor(long recordCount, int recordsPerBatch, int streamCount) {
+ final Schema pojoSchema = new Schema(ImmutableList.of(
+ Field.nullable("a", MinorType.BIGINT.getType()),
+ Field.nullable("b", MinorType.BIGINT.getType()),
+ Field.nullable("c", MinorType.BIGINT.getType()),
+ Field.nullable("d", MinorType.BIGINT.getType())
+ ));
+
+ ByteString serializedSchema = ByteString.copyFrom(pojoSchema.toByteArray());
+
+ return FlightDescriptor.command(Perf.newBuilder()
+ .setRecordsPerStream(recordCount)
+ .setRecordsPerBatch(recordsPerBatch)
+ .setSchema(serializedSchema)
+ .setStreamCount(streamCount)
+ .build()
+ .toByteArray());
+ }
+
+ public static void main(String[] args) throws Exception {
+ new TestPerf().throughput();
+ }
+
+ @Test
+ public void throughput() throws Exception {
+ final int numRuns = 10;
+ ListeningExecutorService pool = MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(4));
+ double [] throughPuts = new double[numRuns];
+
+ for (int i = 0; i < numRuns; i++) {
+ try (
+ final BufferAllocator a = new RootAllocator(Long.MAX_VALUE);
+ final PerformanceTestServer server =
+ FlightTestUtil.getStartedServer((location) -> new PerformanceTestServer(a, location));
+ final FlightClient client = FlightClient.builder(a, server.getLocation()).build();
+ ) {
+ final FlightInfo info = client.getInfo(getPerfFlightDescriptor(50_000_000L, 4095, 2));
+ List<ListenableFuture<Result>> results = info.getEndpoints()
+ .stream()
+ .map(t -> new Consumer(client, t.getTicket()))
+ .map(t -> pool.submit(t))
+ .collect(Collectors.toList());
+
+ final Result r = Futures.whenAllSucceed(results).call(() -> {
+ Result res = new Result();
+ for (ListenableFuture<Result> f : results) {
+ res.add(f.get());
+ }
+ return res;
+ }, pool).get();
+
+ double seconds = r.nanos * 1.0d / 1000 / 1000 / 1000;
+ throughPuts[i] = (r.bytes * 1.0d / 1024 / 1024) / seconds;
+ System.out.println(String.format(
+ "Transferred %d records totaling %s bytes at %f MiB/s. %f record/s. %f batch/s.",
+ r.rows,
+ r.bytes,
+ throughPuts[i],
+ (r.rows * 1.0d) / seconds,
+ (r.batches * 1.0d) / seconds
+ ));
+ }
+ }
+ pool.shutdown();
+
+ System.out.println("Summary: ");
+ double average = Arrays.stream(throughPuts).sum() / numRuns;
+ double sqrSum = Arrays.stream(throughPuts).map(val -> val - average).map(val -> val * val).sum();
+ double stddev = Math.sqrt(sqrSum / numRuns);
+ System.out.println(String.format("Average throughput: %f MiB/s, standard deviation: %f MiB/s",
+ average, stddev));
+ }
+
+ private final class Consumer implements Callable<Result> {
+
+ private final FlightClient client;
+ private final Ticket ticket;
+
+ public Consumer(FlightClient client, Ticket ticket) {
+ super();
+ this.client = client;
+ this.ticket = ticket;
+ }
+
+ @Override
+ public Result call() throws Exception {
+ final Result r = new Result();
+ Stopwatch watch = Stopwatch.createStarted();
+ try (final FlightStream stream = client.getStream(ticket)) {
+ final VectorSchemaRoot root = stream.getRoot();
+ try {
+ BigIntVector a = (BigIntVector) root.getVector("a");
+ while (stream.next()) {
+ int rows = root.getRowCount();
+ long aSum = r.aSum;
+ for (int i = 0; i < rows; i++) {
+ if (VALIDATE) {
+ aSum += a.get(i);
+ }
+ }
+ r.bytes += rows * 32;
+ r.rows += rows;
+ r.aSum = aSum;
+ r.batches++;
+ }
+
+ r.nanos = watch.elapsed(TimeUnit.NANOSECONDS);
+ return r;
+ } finally {
+ root.clear();
+ }
+ }
+ }
+
+ }
+
+ private final class Result {
+ private long rows;
+ private long aSum;
+ private long bytes;
+ private long nanos;
+ private long batches;
+
+ public void add(Result r) {
+ rows += r.rows;
+ aSum += r.aSum;
+ bytes += r.bytes;
+ batches += r.batches;
+ nanos = Math.max(nanos, r.nanos);
+ }
+
+ @Override
+ public String toString() {
+ return MoreObjects.toStringHelper(this)
+ .add("rows", rows)
+ .add("aSum", aSum)
+ .add("batches", batches)
+ .add("bytes", bytes)
+ .add("nanos", nanos)
+ .toString();
+ }
+ }
+}
diff --git a/src/arrow/java/flight/flight-core/src/test/protobuf/perf.proto b/src/arrow/java/flight/flight-core/src/test/protobuf/perf.proto
new file mode 100644
index 000000000..99f35a9e6
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/test/protobuf/perf.proto
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = "proto3";
+
+option java_package = "org.apache.arrow.flight.perf.impl";
+
+message Perf {
+ bytes schema = 1;
+ int32 stream_count = 2;
+ int64 records_per_stream = 3;
+ int32 records_per_batch = 4;
+}
+
+/*
+ * Payload of ticket
+ */
+message Token {
+
+ // definition of entire flight.
+ Perf definition = 1;
+
+ // inclusive start
+ int64 start = 2;
+
+ // exclusive end
+ int64 end = 3;
+
+}
+
diff --git a/src/arrow/java/flight/flight-core/src/test/resources/logback.xml b/src/arrow/java/flight/flight-core/src/test/resources/logback.xml
new file mode 100644
index 000000000..444b2ed6d
--- /dev/null
+++ b/src/arrow/java/flight/flight-core/src/test/resources/logback.xml
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+ license agreements. See the NOTICE file distributed with this work for additional
+ information regarding copyright ownership. The ASF licenses this file to
+ You under the Apache License, Version 2.0 (the "License"); you may not use
+ this file except in compliance with the License. You may obtain a copy of
+ the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+ by applicable law or agreed to in writing, software distributed under the
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ OF ANY KIND, either express or implied. See the License for the specific
+ language governing permissions and limitations under the License. -->
+
+<configuration>
+ <statusListener class="ch.qos.logback.core.status.NopStatusListener"/>
+ <appender name="SOCKET"
+ class="de.huxhorn.lilith.logback.appender.ClassicMultiplexSocketAppender">
+ <Compressing>true</Compressing>
+ <ReconnectionDelay>10000</ReconnectionDelay>
+ <IncludeCallerData>true</IncludeCallerData>
+ <RemoteHosts>${LILITH_HOSTNAME:-localhost}</RemoteHosts>
+ </appender>
+
+ <logger name="org.apache.arrow" additivity="false">
+ <level value="info" />
+ <appender-ref ref="FILE" />
+ </logger>
+
+</configuration>
diff --git a/src/arrow/java/flight/flight-grpc/pom.xml b/src/arrow/java/flight/flight-grpc/pom.xml
new file mode 100644
index 000000000..1968484a1
--- /dev/null
+++ b/src/arrow/java/flight/flight-grpc/pom.xml
@@ -0,0 +1,132 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+ license agreements. See the NOTICE file distributed with this work for additional
+ information regarding copyright ownership. The ASF licenses this file to
+ You under the Apache License, Version 2.0 (the "License"); you may not use
+ this file except in compliance with the License. You may obtain a copy of
+ the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+ by applicable law or agreed to in writing, software distributed under the
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ OF ANY KIND, either express or implied. See the License for the specific
+ language governing permissions and limitations under the License. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <parent>
+ <artifactId>arrow-java-root</artifactId>
+ <groupId>org.apache.arrow</groupId>
+ <version>6.0.1</version>
+ <relativePath>../../pom.xml</relativePath>
+ </parent>
+ <modelVersion>4.0.0</modelVersion>
+
+ <artifactId>flight-grpc</artifactId>
+ <name>Arrow Flight GRPC</name>
+ <description>(Experimental)Contains utility class to expose Flight gRPC service and client</description>
+ <packaging>jar</packaging>
+
+ <properties>
+ <dep.grpc.version>1.41.0</dep.grpc.version>
+ <dep.protobuf.version>3.7.1</dep.protobuf.version>
+ <forkCount>1</forkCount>
+ </properties>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>flight-core</artifactId>
+ <version>${project.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>io.netty</groupId>
+ <artifactId>netty-transport-native-unix-common</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>io.netty</groupId>
+ <artifactId>netty-transport-native-kqueue</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>io.netty</groupId>
+ <artifactId>netty-transport-native-epoll</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>io.grpc</groupId>
+ <artifactId>grpc-core</artifactId>
+ <version>${dep.grpc.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>io.grpc</groupId>
+ <artifactId>grpc-stub</artifactId>
+ <version>${dep.grpc.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-memory-core</artifactId>
+ <version>${project.version}</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-memory-netty</artifactId>
+ <version>${project.version}</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>io.grpc</groupId>
+ <artifactId>grpc-protobuf</artifactId>
+ <version>${dep.grpc.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.google.protobuf</groupId>
+ <artifactId>protobuf-java</artifactId>
+ <version>${dep.protobuf.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>io.grpc</groupId>
+ <artifactId>grpc-api</artifactId>
+ <version>${dep.grpc.version}</version>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <extensions>
+ <!-- provides os.detected.classifier (i.e. linux-x86_64, osx-x86_64) property -->
+ <extension>
+ <groupId>kr.motd.maven</groupId>
+ <artifactId>os-maven-plugin</artifactId>
+ <version>1.5.0.Final</version>
+ </extension>
+ </extensions>
+ <plugins>
+ <plugin>
+ <groupId>org.xolstice.maven.plugins</groupId>
+ <artifactId>protobuf-maven-plugin</artifactId>
+ <version>0.5.0</version>
+ <configuration>
+ <protocArtifact>com.google.protobuf:protoc:${dep.protobuf.version}:exe:${os.detected.classifier}</protocArtifact>
+ <clearOutputDirectory>false</clearOutputDirectory>
+ <pluginId>grpc-java</pluginId>
+ <pluginArtifact>io.grpc:protoc-gen-grpc-java:${dep.grpc.version}:exe:${os.detected.classifier}</pluginArtifact>
+ </configuration>
+ <executions>
+ <execution>
+ <id>test</id>
+ <configuration>
+ <protoSourceRoot>${basedir}/src/test/protobuf</protoSourceRoot>
+ <outputDirectory>${project.build.directory}/generated-test-sources//protobuf</outputDirectory>
+ </configuration>
+ <goals>
+ <goal>compile</goal>
+ <goal>compile-custom</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+
+</project>
diff --git a/src/arrow/java/flight/flight-grpc/src/main/java/org/apache/arrow/flight/FlightGrpcUtils.java b/src/arrow/java/flight/flight-grpc/src/main/java/org/apache/arrow/flight/FlightGrpcUtils.java
new file mode 100644
index 000000000..eb5e492b4
--- /dev/null
+++ b/src/arrow/java/flight/flight-grpc/src/main/java/org/apache/arrow/flight/FlightGrpcUtils.java
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.util.Collections;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.arrow.flight.auth.ServerAuthHandler;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.util.VisibleForTesting;
+
+import io.grpc.BindableService;
+import io.grpc.CallOptions;
+import io.grpc.ClientCall;
+import io.grpc.ConnectivityState;
+import io.grpc.ManagedChannel;
+import io.grpc.MethodDescriptor;
+
+/**
+ * Exposes Flight GRPC service & client.
+ */
+public class FlightGrpcUtils {
+ /**
+ * Proxy class for ManagedChannel that makes closure a no-op.
+ */
+ @VisibleForTesting
+ static class NonClosingProxyManagedChannel extends ManagedChannel {
+ private final ManagedChannel channel;
+ private boolean isShutdown;
+
+ NonClosingProxyManagedChannel(ManagedChannel channel) {
+ this.channel = channel;
+ this.isShutdown = channel.isShutdown();
+ }
+
+ @Override
+ public ManagedChannel shutdown() {
+ isShutdown = true;
+ return this;
+ }
+
+ @Override
+ public boolean isShutdown() {
+ if (this.channel.isShutdown()) {
+ // If the underlying channel is shut down, ensure we're updated to match.
+ shutdown();
+ }
+ return isShutdown;
+ }
+
+ @Override
+ public boolean isTerminated() {
+ return this.isShutdown();
+ }
+
+ @Override
+ public ManagedChannel shutdownNow() {
+ return shutdown();
+ }
+
+ @Override
+ public boolean awaitTermination(long l, TimeUnit timeUnit) {
+ // Don't actually await termination, since it'll be a no-op, so simply return whether or not
+ // the channel has been shut down already.
+ return this.isShutdown();
+ }
+
+ @Override
+ public <RequestT, ResponseT> ClientCall<RequestT, ResponseT> newCall(
+ MethodDescriptor<RequestT, ResponseT> methodDescriptor, CallOptions callOptions) {
+ if (this.isShutdown()) {
+ throw new IllegalStateException("Channel has been shut down.");
+ }
+
+ return this.channel.newCall(methodDescriptor, callOptions);
+ }
+
+ @Override
+ public String authority() {
+ return this.channel.authority();
+ }
+
+ @Override
+ public ConnectivityState getState(boolean requestConnection) {
+ if (this.isShutdown()) {
+ return ConnectivityState.SHUTDOWN;
+ }
+
+ return this.channel.getState(requestConnection);
+ }
+
+ @Override
+ public void notifyWhenStateChanged(ConnectivityState source, Runnable callback) {
+ // The proxy has no insight into the underlying channel state changes, so we'll have to leak the abstraction
+ // a bit here and simply pass to the underlying channel, even though it will never transition to shutdown via
+ // the proxy. This should be fine, since it's mainly targeted at the FlightClient and there's no getter for
+ // the channel.
+ this.channel.notifyWhenStateChanged(source, callback);
+ }
+
+ @Override
+ public void resetConnectBackoff() {
+ this.channel.resetConnectBackoff();
+ }
+
+ @Override
+ public void enterIdle() {
+ this.channel.enterIdle();
+ }
+ }
+
+ private FlightGrpcUtils() {}
+
+ /**
+ * Creates a Flight service.
+ * @param allocator Memory allocator
+ * @param producer Specifies the service api
+ * @param authHandler Authentication handler
+ * @param executor Executor service
+ * @return FlightBindingService
+ */
+ public static BindableService createFlightService(BufferAllocator allocator, FlightProducer producer,
+ ServerAuthHandler authHandler, ExecutorService executor) {
+ return new FlightBindingService(allocator, producer, authHandler, executor);
+ }
+
+ /**
+ * Creates a Flight client.
+ * @param incomingAllocator Memory allocator
+ * @param channel provides a connection to a gRPC server.
+ */
+ public static FlightClient createFlightClient(BufferAllocator incomingAllocator, ManagedChannel channel) {
+ return new FlightClient(incomingAllocator, channel, Collections.emptyList());
+ }
+
+ /**
+ * Creates a Flight client.
+ * @param incomingAllocator Memory allocator
+ * @param channel provides a connection to a gRPC server. Will not be closed on closure of the returned FlightClient.
+ */
+ public static FlightClient createFlightClientWithSharedChannel(
+ BufferAllocator incomingAllocator, ManagedChannel channel) {
+ return new FlightClient(incomingAllocator, new NonClosingProxyManagedChannel(channel), Collections.emptyList());
+ }
+}
diff --git a/src/arrow/java/flight/flight-grpc/src/test/java/org/apache/arrow/flight/TestFlightGrpcUtils.java b/src/arrow/java/flight/flight-grpc/src/test/java/org/apache/arrow/flight/TestFlightGrpcUtils.java
new file mode 100644
index 000000000..142a0f937
--- /dev/null
+++ b/src/arrow/java/flight/flight-grpc/src/test/java/org/apache/arrow/flight/TestFlightGrpcUtils.java
@@ -0,0 +1,193 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import java.io.IOException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+
+import org.apache.arrow.flight.auth.ServerAuthHandler;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import com.google.protobuf.Empty;
+
+import io.grpc.BindableService;
+import io.grpc.ConnectivityState;
+import io.grpc.ManagedChannel;
+import io.grpc.Server;
+import io.grpc.inprocess.InProcessChannelBuilder;
+import io.grpc.inprocess.InProcessServerBuilder;
+import io.grpc.stub.StreamObserver;
+
+/**
+ * Unit test which adds 2 services to same server end point.
+ */
+public class TestFlightGrpcUtils {
+ private Server server;
+ private BufferAllocator allocator;
+ private String serverName;
+
+ @Before
+ public void setup() throws IOException {
+ //Defines flight service
+ allocator = new RootAllocator(Integer.MAX_VALUE);
+ final NoOpFlightProducer producer = new NoOpFlightProducer();
+ final ServerAuthHandler authHandler = ServerAuthHandler.NO_OP;
+ final ExecutorService exec = Executors.newCachedThreadPool();
+ final BindableService flightBindingService = FlightGrpcUtils.createFlightService(allocator, producer,
+ authHandler, exec);
+
+ //initializes server with 2 services - FlightBindingService & TestService
+ serverName = InProcessServerBuilder.generateName();
+ server = InProcessServerBuilder.forName(serverName)
+ .directExecutor()
+ .addService(flightBindingService)
+ .addService(new TestServiceAdapter())
+ .build();
+ server.start();
+ }
+
+ @After
+ public void cleanup() {
+ server.shutdownNow();
+ }
+
+ /**
+ * This test checks if multiple gRPC services can be added to the same
+ * server endpoint and if they can be used by different clients via the same channel.
+ * @throws IOException If server fails to start.
+ */
+ @Test
+ public void testMultipleGrpcServices() throws IOException {
+ //Initializes channel so that multiple clients can communicate with server
+ final ManagedChannel managedChannel = InProcessChannelBuilder.forName(serverName)
+ .directExecutor()
+ .build();
+
+ //Defines flight client and calls service method. Since we use a NoOpFlightProducer we expect the service
+ //to throw a RunTimeException
+ final FlightClient flightClient = FlightGrpcUtils.createFlightClient(allocator, managedChannel);
+ final Iterable<ActionType> actionTypes = flightClient.listActions();
+ assertThrows(FlightRuntimeException.class, () -> actionTypes.forEach(
+ actionType -> System.out.println(actionType.toString())));
+
+ //Define Test client as a blocking stub and call test method which correctly returns an empty protobuf object
+ final TestServiceGrpc.TestServiceBlockingStub blockingStub = TestServiceGrpc.newBlockingStub(managedChannel);
+ Assert.assertEquals(Empty.newBuilder().build(), blockingStub.test(Empty.newBuilder().build()));
+ }
+
+ @Test
+ public void testShutdown() throws IOException, InterruptedException {
+ //Initializes channel so that multiple clients can communicate with server
+ final ManagedChannel managedChannel = InProcessChannelBuilder.forName(serverName)
+ .directExecutor()
+ .build();
+
+ //Defines flight client and calls service method. Since we use a NoOpFlightProducer we expect the service
+ //to throw a RunTimeException
+ final FlightClient flightClient = FlightGrpcUtils.createFlightClientWithSharedChannel(allocator, managedChannel);
+
+ // Should be a no-op.
+ flightClient.close();
+ Assert.assertFalse(managedChannel.isShutdown());
+ Assert.assertFalse(managedChannel.isTerminated());
+ Assert.assertEquals(ConnectivityState.IDLE, managedChannel.getState(false));
+ managedChannel.shutdownNow();
+ }
+
+ @Test
+ public void testProxyChannel() throws IOException, InterruptedException {
+ //Initializes channel so that multiple clients can communicate with server
+ final ManagedChannel managedChannel = InProcessChannelBuilder.forName(serverName)
+ .directExecutor()
+ .build();
+
+ final FlightGrpcUtils.NonClosingProxyManagedChannel proxyChannel =
+ new FlightGrpcUtils.NonClosingProxyManagedChannel(managedChannel);
+ Assert.assertFalse(proxyChannel.isShutdown());
+ Assert.assertFalse(proxyChannel.isTerminated());
+ proxyChannel.shutdown();
+ Assert.assertTrue(proxyChannel.isShutdown());
+ Assert.assertTrue(proxyChannel.isTerminated());
+ Assert.assertEquals(ConnectivityState.SHUTDOWN, proxyChannel.getState(false));
+ try {
+ proxyChannel.newCall(null, null);
+ Assert.fail();
+ } catch (IllegalStateException e) {
+ // This is expected, since the proxy channel is shut down.
+ }
+
+ Assert.assertFalse(managedChannel.isShutdown());
+ Assert.assertFalse(managedChannel.isTerminated());
+ Assert.assertEquals(ConnectivityState.IDLE, managedChannel.getState(false));
+
+ managedChannel.shutdownNow();
+ }
+
+ @Test
+ public void testProxyChannelWithClosedChannel() throws IOException, InterruptedException {
+ //Initializes channel so that multiple clients can communicate with server
+ final ManagedChannel managedChannel = InProcessChannelBuilder.forName(serverName)
+ .directExecutor()
+ .build();
+
+ final FlightGrpcUtils.NonClosingProxyManagedChannel proxyChannel =
+ new FlightGrpcUtils.NonClosingProxyManagedChannel(managedChannel);
+ Assert.assertFalse(proxyChannel.isShutdown());
+ Assert.assertFalse(proxyChannel.isTerminated());
+ managedChannel.shutdownNow();
+ Assert.assertTrue(proxyChannel.isShutdown());
+ Assert.assertTrue(proxyChannel.isTerminated());
+ Assert.assertEquals(ConnectivityState.SHUTDOWN, proxyChannel.getState(false));
+ try {
+ proxyChannel.newCall(null, null);
+ Assert.fail();
+ } catch (IllegalStateException e) {
+ // This is expected, since the proxy channel is shut down.
+ }
+
+ Assert.assertTrue(managedChannel.isShutdown());
+ Assert.assertTrue(managedChannel.isTerminated());
+ Assert.assertEquals(ConnectivityState.SHUTDOWN, managedChannel.getState(false));
+ }
+
+ /**
+ * Private class used for testing purposes that overrides service behavior.
+ */
+ private class TestServiceAdapter extends TestServiceGrpc.TestServiceImplBase {
+
+ /**
+ * gRPC service that receives an empty object & returns and empty protobuf object.
+ * @param request google.protobuf.Empty
+ * @param responseObserver google.protobuf.Empty
+ */
+ @Override
+ public void test(Empty request, StreamObserver<Empty> responseObserver) {
+ responseObserver.onNext(Empty.newBuilder().build());
+ responseObserver.onCompleted();
+ }
+ }
+}
+
diff --git a/src/arrow/java/flight/flight-grpc/src/test/protobuf/test.proto b/src/arrow/java/flight/flight-grpc/src/test/protobuf/test.proto
new file mode 100644
index 000000000..6fa1890b2
--- /dev/null
+++ b/src/arrow/java/flight/flight-grpc/src/test/protobuf/test.proto
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = "proto3";
+
+option java_package = "org.apache.arrow.flight";
+
+import "google/protobuf/empty.proto";
+
+service TestService {
+ rpc Test(google.protobuf.Empty) returns (google.protobuf.Empty) {}
+}
diff --git a/src/arrow/java/format/pom.xml b/src/arrow/java/format/pom.xml
new file mode 100644
index 000000000..067bed358
--- /dev/null
+++ b/src/arrow/java/format/pom.xml
@@ -0,0 +1,46 @@
+<?xml version="1.0"?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+ license agreements. See the NOTICE file distributed with this work for additional
+ information regarding copyright ownership. The ASF licenses this file to
+ You under the Apache License, Version 2.0 (the "License"); you may not use
+ this file except in compliance with the License. You may obtain a copy of
+ the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+ by applicable law or agreed to in writing, software distributed under the
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ OF ANY KIND, either express or implied. See the License for the specific
+ language governing permissions and limitations under the License. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<modelVersion>4.0.0</modelVersion>
+
+<parent>
+ <artifactId>arrow-java-root</artifactId>
+ <groupId>org.apache.arrow</groupId>
+ <version>6.0.1</version>
+</parent>
+
+<artifactId>arrow-format</artifactId>
+<packaging>jar</packaging>
+<name>Arrow Format</name>
+<description>Generated Java files from the IPC Flatbuffer definitions.</description>
+
+ <dependencies>
+ <dependency>
+ <groupId>com.google.flatbuffers</groupId>
+ <artifactId>flatbuffers-java</artifactId>
+ </dependency>
+ </dependencies>
+
+ <build>
+
+ <plugins>
+ <plugin> <!-- no checkstyle on the generated code -->
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-checkstyle-plugin</artifactId>
+ <configuration>
+ <skip>true</skip>
+ </configuration>
+ </plugin>
+ </plugins>
+
+</build>
+</project>
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Binary.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Binary.java
new file mode 100644
index 000000000..f2ea52502
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Binary.java
@@ -0,0 +1,51 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * Opaque binary data
+ */
+public final class Binary extends Table {
+ public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+ public static Binary getRootAsBinary(ByteBuffer _bb) { return getRootAsBinary(_bb, new Binary()); }
+ public static Binary getRootAsBinary(ByteBuffer _bb, Binary obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+ public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+ public Binary __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+
+ public static void startBinary(FlatBufferBuilder builder) { builder.startTable(0); }
+ public static int endBinary(FlatBufferBuilder builder) {
+ int o = builder.endTable();
+ return o;
+ }
+
+ public static final class Vector extends BaseVector {
+ public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+ public Binary get(int j) { return get(new Binary(), j); }
+ public Binary get(Binary obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); }
+ }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Block.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Block.java
new file mode 100644
index 000000000..e1435f832
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Block.java
@@ -0,0 +1,61 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+public final class Block extends Struct {
+ public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+ public Block __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+ /**
+ * Index to the start of the RecordBlock (note this is past the Message header)
+ */
+ public long offset() { return bb.getLong(bb_pos + 0); }
+ /**
+ * Length of the metadata
+ */
+ public int metaDataLength() { return bb.getInt(bb_pos + 8); }
+ /**
+ * Length of the data (this is aligned so there can be a gap between this and
+ * the metadata).
+ */
+ public long bodyLength() { return bb.getLong(bb_pos + 16); }
+
+ public static int createBlock(FlatBufferBuilder builder, long offset, int metaDataLength, long bodyLength) {
+ builder.prep(8, 24);
+ builder.putLong(bodyLength);
+ builder.pad(4);
+ builder.putInt(metaDataLength);
+ builder.putLong(offset);
+ return builder.offset();
+ }
+
+ public static final class Vector extends BaseVector {
+ public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+ public Block get(int j) { return get(new Block(), j); }
+ public Block get(Block obj, int j) { return obj.__assign(__element(j), bb); }
+ }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/BodyCompression.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/BodyCompression.java
new file mode 100644
index 000000000..650454eb1
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/BodyCompression.java
@@ -0,0 +1,72 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * Optional compression for the memory buffers constituting IPC message
+ * bodies. Intended for use with RecordBatch but could be used for other
+ * message types
+ */
+public final class BodyCompression extends Table {
+ public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+ public static BodyCompression getRootAsBodyCompression(ByteBuffer _bb) { return getRootAsBodyCompression(_bb, new BodyCompression()); }
+ public static BodyCompression getRootAsBodyCompression(ByteBuffer _bb, BodyCompression obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+ public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+ public BodyCompression __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+ /**
+ * Compressor library
+ */
+ public byte codec() { int o = __offset(4); return o != 0 ? bb.get(o + bb_pos) : 0; }
+ /**
+ * Indicates the way the record batch body was compressed
+ */
+ public byte method() { int o = __offset(6); return o != 0 ? bb.get(o + bb_pos) : 0; }
+
+ public static int createBodyCompression(FlatBufferBuilder builder,
+ byte codec,
+ byte method) {
+ builder.startTable(2);
+ BodyCompression.addMethod(builder, method);
+ BodyCompression.addCodec(builder, codec);
+ return BodyCompression.endBodyCompression(builder);
+ }
+
+ public static void startBodyCompression(FlatBufferBuilder builder) { builder.startTable(2); }
+ public static void addCodec(FlatBufferBuilder builder, byte codec) { builder.addByte(0, codec, 0); }
+ public static void addMethod(FlatBufferBuilder builder, byte method) { builder.addByte(1, method, 0); }
+ public static int endBodyCompression(FlatBufferBuilder builder) {
+ int o = builder.endTable();
+ return o;
+ }
+
+ public static final class Vector extends BaseVector {
+ public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+ public BodyCompression get(int j) { return get(new BodyCompression(), j); }
+ public BodyCompression get(BodyCompression obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); }
+ }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/BodyCompressionMethod.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/BodyCompressionMethod.java
new file mode 100644
index 000000000..48cff16e7
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/BodyCompressionMethod.java
@@ -0,0 +1,43 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+/**
+ * Provided for forward compatibility in case we need to support different
+ * strategies for compressing the IPC message body (like whole-body
+ * compression rather than buffer-level) in the future
+ */
+public final class BodyCompressionMethod {
+ private BodyCompressionMethod() { }
+ /**
+ * Each constituent buffer is first compressed with the indicated
+ * compressor, and then written with the uncompressed length in the first 8
+ * bytes as a 64-bit little-endian signed integer followed by the compressed
+ * buffer bytes (and then padding as required by the protocol). The
+ * uncompressed length may be set to -1 to indicate that the data that
+ * follows is not compressed, which can be useful for cases where
+ * compression does not yield appreciable savings.
+ */
+ public static final byte BUFFER = 0;
+
+ public static final String[] names = { "BUFFER", };
+
+ public static String name(int e) { return names[e]; }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Bool.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Bool.java
new file mode 100644
index 000000000..e6b54e4b7
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Bool.java
@@ -0,0 +1,48 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+public final class Bool extends Table {
+ public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+ public static Bool getRootAsBool(ByteBuffer _bb) { return getRootAsBool(_bb, new Bool()); }
+ public static Bool getRootAsBool(ByteBuffer _bb, Bool obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+ public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+ public Bool __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+
+ public static void startBool(FlatBufferBuilder builder) { builder.startTable(0); }
+ public static int endBool(FlatBufferBuilder builder) {
+ int o = builder.endTable();
+ return o;
+ }
+
+ public static final class Vector extends BaseVector {
+ public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+ public Bool get(int j) { return get(new Bool(), j); }
+ public Bool get(Bool obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); }
+ }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Buffer.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Buffer.java
new file mode 100644
index 000000000..589ed0b71
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Buffer.java
@@ -0,0 +1,63 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * ----------------------------------------------------------------------
+ * A Buffer represents a single contiguous memory segment
+ */
+public final class Buffer extends Struct {
+ public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+ public Buffer __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+ /**
+ * The relative offset into the shared memory page where the bytes for this
+ * buffer starts
+ */
+ public long offset() { return bb.getLong(bb_pos + 0); }
+ /**
+ * The absolute length (in bytes) of the memory buffer. The memory is found
+ * from offset (inclusive) to offset + length (non-inclusive). When building
+ * messages using the encapsulated IPC message, padding bytes may be written
+ * after a buffer, but such padding bytes do not need to be accounted for in
+ * the size here.
+ */
+ public long length() { return bb.getLong(bb_pos + 8); }
+
+ public static int createBuffer(FlatBufferBuilder builder, long offset, long length) {
+ builder.prep(8, 16);
+ builder.putLong(length);
+ builder.putLong(offset);
+ return builder.offset();
+ }
+
+ public static final class Vector extends BaseVector {
+ public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+ public Buffer get(int j) { return get(new Buffer(), j); }
+ public Buffer get(Buffer obj, int j) { return obj.__assign(__element(j), bb); }
+ }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/CompressionType.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/CompressionType.java
new file mode 100644
index 000000000..0597ffd30
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/CompressionType.java
@@ -0,0 +1,30 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+public final class CompressionType {
+ private CompressionType() { }
+ public static final byte LZ4_FRAME = 0;
+ public static final byte ZSTD = 1;
+
+ public static final String[] names = { "LZ4_FRAME", "ZSTD", };
+
+ public static String name(int e) { return names[e]; }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java
new file mode 100644
index 000000000..b2fcc9e39
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java
@@ -0,0 +1,65 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * Date is either a 32-bit or 64-bit type representing elapsed time since UNIX
+ * epoch (1970-01-01), stored in either of two units:
+ *
+ * * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no
+ * leap seconds), where the values are evenly divisible by 86400000
+ * * Days (32 bits) since the UNIX epoch
+ */
+public final class Date extends Table {
+ public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+ public static Date getRootAsDate(ByteBuffer _bb) { return getRootAsDate(_bb, new Date()); }
+ public static Date getRootAsDate(ByteBuffer _bb, Date obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+ public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+ public Date __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+ public short unit() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 1; }
+
+ public static int createDate(FlatBufferBuilder builder,
+ short unit) {
+ builder.startTable(1);
+ Date.addUnit(builder, unit);
+ return Date.endDate(builder);
+ }
+
+ public static void startDate(FlatBufferBuilder builder) { builder.startTable(1); }
+ public static void addUnit(FlatBufferBuilder builder, short unit) { builder.addShort(0, unit, 1); }
+ public static int endDate(FlatBufferBuilder builder) {
+ int o = builder.endTable();
+ return o;
+ }
+
+ public static final class Vector extends BaseVector {
+ public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+ public Date get(int j) { return get(new Date(), j); }
+ public Date get(Date obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); }
+ }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/DateUnit.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/DateUnit.java
new file mode 100644
index 000000000..f2c96f45b
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/DateUnit.java
@@ -0,0 +1,30 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+public final class DateUnit {
+ private DateUnit() { }
+ public static final short DAY = 0;
+ public static final short MILLISECOND = 1;
+
+ public static final String[] names = { "DAY", "MILLISECOND", };
+
+ public static String name(int e) { return names[e]; }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Decimal.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Decimal.java
new file mode 100644
index 000000000..8ffaa1ebb
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Decimal.java
@@ -0,0 +1,81 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * Exact decimal value represented as an integer value in two's
+ * complement. Currently only 128-bit (16-byte) and 256-bit (32-byte) integers
+ * are used. The representation uses the endianness indicated
+ * in the Schema.
+ */
+public final class Decimal extends Table {
+ public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+ public static Decimal getRootAsDecimal(ByteBuffer _bb) { return getRootAsDecimal(_bb, new Decimal()); }
+ public static Decimal getRootAsDecimal(ByteBuffer _bb, Decimal obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+ public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+ public Decimal __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+ /**
+ * Total number of decimal digits
+ */
+ public int precision() { int o = __offset(4); return o != 0 ? bb.getInt(o + bb_pos) : 0; }
+ /**
+ * Number of digits after the decimal point "."
+ */
+ public int scale() { int o = __offset(6); return o != 0 ? bb.getInt(o + bb_pos) : 0; }
+ /**
+ * Number of bits per value. The only accepted widths are 128 and 256.
+ * We use bitWidth for consistency with Int::bitWidth.
+ */
+ public int bitWidth() { int o = __offset(8); return o != 0 ? bb.getInt(o + bb_pos) : 128; }
+
+ public static int createDecimal(FlatBufferBuilder builder,
+ int precision,
+ int scale,
+ int bitWidth) {
+ builder.startTable(3);
+ Decimal.addBitWidth(builder, bitWidth);
+ Decimal.addScale(builder, scale);
+ Decimal.addPrecision(builder, precision);
+ return Decimal.endDecimal(builder);
+ }
+
+ public static void startDecimal(FlatBufferBuilder builder) { builder.startTable(3); }
+ public static void addPrecision(FlatBufferBuilder builder, int precision) { builder.addInt(0, precision, 0); }
+ public static void addScale(FlatBufferBuilder builder, int scale) { builder.addInt(1, scale, 0); }
+ public static void addBitWidth(FlatBufferBuilder builder, int bitWidth) { builder.addInt(2, bitWidth, 128); }
+ public static int endDecimal(FlatBufferBuilder builder) {
+ int o = builder.endTable();
+ return o;
+ }
+
+ public static final class Vector extends BaseVector {
+ public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+ public Decimal get(int j) { return get(new Decimal(), j); }
+ public Decimal get(Decimal obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); }
+ }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/DictionaryBatch.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/DictionaryBatch.java
new file mode 100644
index 000000000..fe6c59fb5
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/DictionaryBatch.java
@@ -0,0 +1,79 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * For sending dictionary encoding information. Any Field can be
+ * dictionary-encoded, but in this case none of its children may be
+ * dictionary-encoded.
+ * There is one vector / column per dictionary, but that vector / column
+ * may be spread across multiple dictionary batches by using the isDelta
+ * flag
+ */
+public final class DictionaryBatch extends Table {
+ public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+ public static DictionaryBatch getRootAsDictionaryBatch(ByteBuffer _bb) { return getRootAsDictionaryBatch(_bb, new DictionaryBatch()); }
+ public static DictionaryBatch getRootAsDictionaryBatch(ByteBuffer _bb, DictionaryBatch obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+ public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+ public DictionaryBatch __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+ public long id() { int o = __offset(4); return o != 0 ? bb.getLong(o + bb_pos) : 0L; }
+ public org.apache.arrow.flatbuf.RecordBatch data() { return data(new org.apache.arrow.flatbuf.RecordBatch()); }
+ public org.apache.arrow.flatbuf.RecordBatch data(org.apache.arrow.flatbuf.RecordBatch obj) { int o = __offset(6); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; }
+ /**
+ * If isDelta is true the values in the dictionary are to be appended to a
+ * dictionary with the indicated id. If isDelta is false this dictionary
+ * should replace the existing dictionary.
+ */
+ public boolean isDelta() { int o = __offset(8); return o != 0 ? 0!=bb.get(o + bb_pos) : false; }
+
+ public static int createDictionaryBatch(FlatBufferBuilder builder,
+ long id,
+ int dataOffset,
+ boolean isDelta) {
+ builder.startTable(3);
+ DictionaryBatch.addId(builder, id);
+ DictionaryBatch.addData(builder, dataOffset);
+ DictionaryBatch.addIsDelta(builder, isDelta);
+ return DictionaryBatch.endDictionaryBatch(builder);
+ }
+
+ public static void startDictionaryBatch(FlatBufferBuilder builder) { builder.startTable(3); }
+ public static void addId(FlatBufferBuilder builder, long id) { builder.addLong(0, id, 0L); }
+ public static void addData(FlatBufferBuilder builder, int dataOffset) { builder.addOffset(1, dataOffset, 0); }
+ public static void addIsDelta(FlatBufferBuilder builder, boolean isDelta) { builder.addBoolean(2, isDelta, false); }
+ public static int endDictionaryBatch(FlatBufferBuilder builder) {
+ int o = builder.endTable();
+ return o;
+ }
+
+ public static final class Vector extends BaseVector {
+ public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+ public DictionaryBatch get(int j) { return get(new DictionaryBatch(), j); }
+ public DictionaryBatch get(DictionaryBatch obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); }
+ }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/DictionaryEncoding.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/DictionaryEncoding.java
new file mode 100644
index 000000000..8b2bb73e7
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/DictionaryEncoding.java
@@ -0,0 +1,88 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+public final class DictionaryEncoding extends Table {
+ public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+ public static DictionaryEncoding getRootAsDictionaryEncoding(ByteBuffer _bb) { return getRootAsDictionaryEncoding(_bb, new DictionaryEncoding()); }
+ public static DictionaryEncoding getRootAsDictionaryEncoding(ByteBuffer _bb, DictionaryEncoding obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+ public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+ public DictionaryEncoding __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+ /**
+ * The known dictionary id in the application where this data is used. In
+ * the file or streaming formats, the dictionary ids are found in the
+ * DictionaryBatch messages
+ */
+ public long id() { int o = __offset(4); return o != 0 ? bb.getLong(o + bb_pos) : 0L; }
+ /**
+ * The dictionary indices are constrained to be non-negative integers. If
+ * this field is null, the indices must be signed int32. To maximize
+ * cross-language compatibility and performance, implementations are
+ * recommended to prefer signed integer types over unsigned integer types
+ * and to avoid uint64 indices unless they are required by an application.
+ */
+ public org.apache.arrow.flatbuf.Int indexType() { return indexType(new org.apache.arrow.flatbuf.Int()); }
+ public org.apache.arrow.flatbuf.Int indexType(org.apache.arrow.flatbuf.Int obj) { int o = __offset(6); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; }
+ /**
+ * By default, dictionaries are not ordered, or the order does not have
+ * semantic meaning. In some statistical, applications, dictionary-encoding
+ * is used to represent ordered categorical data, and we provide a way to
+ * preserve that metadata here
+ */
+ public boolean isOrdered() { int o = __offset(8); return o != 0 ? 0!=bb.get(o + bb_pos) : false; }
+ public short dictionaryKind() { int o = __offset(10); return o != 0 ? bb.getShort(o + bb_pos) : 0; }
+
+ public static int createDictionaryEncoding(FlatBufferBuilder builder,
+ long id,
+ int indexTypeOffset,
+ boolean isOrdered,
+ short dictionaryKind) {
+ builder.startTable(4);
+ DictionaryEncoding.addId(builder, id);
+ DictionaryEncoding.addIndexType(builder, indexTypeOffset);
+ DictionaryEncoding.addDictionaryKind(builder, dictionaryKind);
+ DictionaryEncoding.addIsOrdered(builder, isOrdered);
+ return DictionaryEncoding.endDictionaryEncoding(builder);
+ }
+
+ public static void startDictionaryEncoding(FlatBufferBuilder builder) { builder.startTable(4); }
+ public static void addId(FlatBufferBuilder builder, long id) { builder.addLong(0, id, 0L); }
+ public static void addIndexType(FlatBufferBuilder builder, int indexTypeOffset) { builder.addOffset(1, indexTypeOffset, 0); }
+ public static void addIsOrdered(FlatBufferBuilder builder, boolean isOrdered) { builder.addBoolean(2, isOrdered, false); }
+ public static void addDictionaryKind(FlatBufferBuilder builder, short dictionaryKind) { builder.addShort(3, dictionaryKind, 0); }
+ public static int endDictionaryEncoding(FlatBufferBuilder builder) {
+ int o = builder.endTable();
+ return o;
+ }
+
+ public static final class Vector extends BaseVector {
+ public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+ public DictionaryEncoding get(int j) { return get(new DictionaryEncoding(), j); }
+ public DictionaryEncoding get(DictionaryEncoding obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); }
+ }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/DictionaryKind.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/DictionaryKind.java
new file mode 100644
index 000000000..ecefa4b76
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/DictionaryKind.java
@@ -0,0 +1,36 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+/**
+ * ----------------------------------------------------------------------
+ * Dictionary encoding metadata
+ * Maintained for forwards compatibility, in the future
+ * Dictionaries might be explicit maps between integers and values
+ * allowing for non-contiguous index values
+ */
+public final class DictionaryKind {
+ private DictionaryKind() { }
+ public static final short DenseArray = 0;
+
+ public static final String[] names = { "DenseArray", };
+
+ public static String name(int e) { return names[e]; }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Duration.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Duration.java
new file mode 100644
index 000000000..e1495f300
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Duration.java
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+public final class Duration extends Table {
+ public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+ public static Duration getRootAsDuration(ByteBuffer _bb) { return getRootAsDuration(_bb, new Duration()); }
+ public static Duration getRootAsDuration(ByteBuffer _bb, Duration obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+ public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+ public Duration __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+ public short unit() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 1; }
+
+ public static int createDuration(FlatBufferBuilder builder,
+ short unit) {
+ builder.startTable(1);
+ Duration.addUnit(builder, unit);
+ return Duration.endDuration(builder);
+ }
+
+ public static void startDuration(FlatBufferBuilder builder) { builder.startTable(1); }
+ public static void addUnit(FlatBufferBuilder builder, short unit) { builder.addShort(0, unit, 1); }
+ public static int endDuration(FlatBufferBuilder builder) {
+ int o = builder.endTable();
+ return o;
+ }
+
+ public static final class Vector extends BaseVector {
+ public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+ public Duration get(int j) { return get(new Duration(), j); }
+ public Duration get(Duration obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); }
+ }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Endianness.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Endianness.java
new file mode 100644
index 000000000..494a3dcf5
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Endianness.java
@@ -0,0 +1,34 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+/**
+ * ----------------------------------------------------------------------
+ * Endianness of the platform producing the data
+ */
+public final class Endianness {
+ private Endianness() { }
+ public static final short Little = 0;
+ public static final short Big = 1;
+
+ public static final String[] names = { "Little", "Big", };
+
+ public static String name(int e) { return names[e]; }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Feature.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Feature.java
new file mode 100644
index 000000000..a4fa84c37
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Feature.java
@@ -0,0 +1,62 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+/**
+ * Represents Arrow Features that might not have full support
+ * within implementations. This is intended to be used in
+ * two scenarios:
+ * 1. A mechanism for readers of Arrow Streams
+ * and files to understand that the stream or file makes
+ * use of a feature that isn't supported or unknown to
+ * the implementation (and therefore can meet the Arrow
+ * forward compatibility guarantees).
+ * 2. A means of negotiating between a client and server
+ * what features a stream is allowed to use. The enums
+ * values here are intented to represent higher level
+ * features, additional details maybe negotiated
+ * with key-value pairs specific to the protocol.
+ *
+ * Enums added to this list should be assigned power-of-two values
+ * to facilitate exchanging and comparing bitmaps for supported
+ * features.
+ */
+public final class Feature {
+ private Feature() { }
+ /**
+ * Needed to make flatbuffers happy.
+ */
+ public static final long UNUSED = 0;
+ /**
+ * The stream makes use of multiple full dictionaries with the
+ * same ID and assumes clients implement dictionary replacement
+ * correctly.
+ */
+ public static final long DICTIONARY_REPLACEMENT = 1;
+ /**
+ * The stream makes use of compressed bodies as described
+ * in Message.fbs.
+ */
+ public static final long COMPRESSED_BODY = 2;
+
+ public static final String[] names = { "UNUSED", "DICTIONARY_REPLACEMENT", "COMPRESSED_BODY", };
+
+ public static String name(int e) { return names[e]; }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Field.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Field.java
new file mode 100644
index 000000000..d34501e0a
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Field.java
@@ -0,0 +1,120 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * ----------------------------------------------------------------------
+ * A field represents a named column in a record / row batch or child of a
+ * nested type.
+ */
+public final class Field extends Table {
+ public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+ public static Field getRootAsField(ByteBuffer _bb) { return getRootAsField(_bb, new Field()); }
+ public static Field getRootAsField(ByteBuffer _bb, Field obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+ public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+ public Field __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+ /**
+ * Name is not required, in i.e. a List
+ */
+ public String name() { int o = __offset(4); return o != 0 ? __string(o + bb_pos) : null; }
+ public ByteBuffer nameAsByteBuffer() { return __vector_as_bytebuffer(4, 1); }
+ public ByteBuffer nameInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 4, 1); }
+ /**
+ * Whether or not this field can contain nulls. Should be true in general.
+ */
+ public boolean nullable() { int o = __offset(6); return o != 0 ? 0!=bb.get(o + bb_pos) : false; }
+ public byte typeType() { int o = __offset(8); return o != 0 ? bb.get(o + bb_pos) : 0; }
+ /**
+ * This is the type of the decoded value if the field is dictionary encoded.
+ */
+ public Table type(Table obj) { int o = __offset(10); return o != 0 ? __union(obj, o + bb_pos) : null; }
+ /**
+ * Present only if the field is dictionary encoded.
+ */
+ public org.apache.arrow.flatbuf.DictionaryEncoding dictionary() { return dictionary(new org.apache.arrow.flatbuf.DictionaryEncoding()); }
+ public org.apache.arrow.flatbuf.DictionaryEncoding dictionary(org.apache.arrow.flatbuf.DictionaryEncoding obj) { int o = __offset(12); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; }
+ /**
+ * children apply only to nested data types like Struct, List and Union. For
+ * primitive types children will have length 0.
+ */
+ public org.apache.arrow.flatbuf.Field children(int j) { return children(new org.apache.arrow.flatbuf.Field(), j); }
+ public org.apache.arrow.flatbuf.Field children(org.apache.arrow.flatbuf.Field obj, int j) { int o = __offset(14); return o != 0 ? obj.__assign(__indirect(__vector(o) + j * 4), bb) : null; }
+ public int childrenLength() { int o = __offset(14); return o != 0 ? __vector_len(o) : 0; }
+ public org.apache.arrow.flatbuf.Field.Vector childrenVector() { return childrenVector(new org.apache.arrow.flatbuf.Field.Vector()); }
+ public org.apache.arrow.flatbuf.Field.Vector childrenVector(org.apache.arrow.flatbuf.Field.Vector obj) { int o = __offset(14); return o != 0 ? obj.__assign(__vector(o), 4, bb) : null; }
+ /**
+ * User-defined metadata
+ */
+ public org.apache.arrow.flatbuf.KeyValue customMetadata(int j) { return customMetadata(new org.apache.arrow.flatbuf.KeyValue(), j); }
+ public org.apache.arrow.flatbuf.KeyValue customMetadata(org.apache.arrow.flatbuf.KeyValue obj, int j) { int o = __offset(16); return o != 0 ? obj.__assign(__indirect(__vector(o) + j * 4), bb) : null; }
+ public int customMetadataLength() { int o = __offset(16); return o != 0 ? __vector_len(o) : 0; }
+ public org.apache.arrow.flatbuf.KeyValue.Vector customMetadataVector() { return customMetadataVector(new org.apache.arrow.flatbuf.KeyValue.Vector()); }
+ public org.apache.arrow.flatbuf.KeyValue.Vector customMetadataVector(org.apache.arrow.flatbuf.KeyValue.Vector obj) { int o = __offset(16); return o != 0 ? obj.__assign(__vector(o), 4, bb) : null; }
+
+ public static int createField(FlatBufferBuilder builder,
+ int nameOffset,
+ boolean nullable,
+ byte type_type,
+ int typeOffset,
+ int dictionaryOffset,
+ int childrenOffset,
+ int custom_metadataOffset) {
+ builder.startTable(7);
+ Field.addCustomMetadata(builder, custom_metadataOffset);
+ Field.addChildren(builder, childrenOffset);
+ Field.addDictionary(builder, dictionaryOffset);
+ Field.addType(builder, typeOffset);
+ Field.addName(builder, nameOffset);
+ Field.addTypeType(builder, type_type);
+ Field.addNullable(builder, nullable);
+ return Field.endField(builder);
+ }
+
+ public static void startField(FlatBufferBuilder builder) { builder.startTable(7); }
+ public static void addName(FlatBufferBuilder builder, int nameOffset) { builder.addOffset(0, nameOffset, 0); }
+ public static void addNullable(FlatBufferBuilder builder, boolean nullable) { builder.addBoolean(1, nullable, false); }
+ public static void addTypeType(FlatBufferBuilder builder, byte typeType) { builder.addByte(2, typeType, 0); }
+ public static void addType(FlatBufferBuilder builder, int typeOffset) { builder.addOffset(3, typeOffset, 0); }
+ public static void addDictionary(FlatBufferBuilder builder, int dictionaryOffset) { builder.addOffset(4, dictionaryOffset, 0); }
+ public static void addChildren(FlatBufferBuilder builder, int childrenOffset) { builder.addOffset(5, childrenOffset, 0); }
+ public static int createChildrenVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addOffset(data[i]); return builder.endVector(); }
+ public static void startChildrenVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); }
+ public static void addCustomMetadata(FlatBufferBuilder builder, int customMetadataOffset) { builder.addOffset(6, customMetadataOffset, 0); }
+ public static int createCustomMetadataVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addOffset(data[i]); return builder.endVector(); }
+ public static void startCustomMetadataVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); }
+ public static int endField(FlatBufferBuilder builder) {
+ int o = builder.endTable();
+ return o;
+ }
+
+ public static final class Vector extends BaseVector {
+ public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+ public Field get(int j) { return get(new Field(), j); }
+ public Field get(Field obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); }
+ }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/FieldNode.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/FieldNode.java
new file mode 100644
index 000000000..3ea9805f6
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/FieldNode.java
@@ -0,0 +1,68 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * ----------------------------------------------------------------------
+ * Data structures for describing a table row batch (a collection of
+ * equal-length Arrow arrays)
+ * Metadata about a field at some level of a nested type tree (but not
+ * its children).
+ *
+ * For example, a List<Int16> with values `[[1, 2, 3], null, [4], [5, 6], null]`
+ * would have {length: 5, null_count: 2} for its List node, and {length: 6,
+ * null_count: 0} for its Int16 node, as separate FieldNode structs
+ */
+public final class FieldNode extends Struct {
+ public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+ public FieldNode __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+ /**
+ * The number of value slots in the Arrow array at this level of a nested
+ * tree
+ */
+ public long length() { return bb.getLong(bb_pos + 0); }
+ /**
+ * The number of observed nulls. Fields with null_count == 0 may choose not
+ * to write their physical validity bitmap out as a materialized buffer,
+ * instead setting the length of the bitmap buffer to 0.
+ */
+ public long nullCount() { return bb.getLong(bb_pos + 8); }
+
+ public static int createFieldNode(FlatBufferBuilder builder, long length, long nullCount) {
+ builder.prep(8, 16);
+ builder.putLong(nullCount);
+ builder.putLong(length);
+ return builder.offset();
+ }
+
+ public static final class Vector extends BaseVector {
+ public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+ public FieldNode get(int j) { return get(new FieldNode(), j); }
+ public FieldNode get(FieldNode obj, int j) { return obj.__assign(__element(j), bb); }
+ }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/FixedSizeBinary.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/FixedSizeBinary.java
new file mode 100644
index 000000000..287b34e22
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/FixedSizeBinary.java
@@ -0,0 +1,60 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+public final class FixedSizeBinary extends Table {
+ public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+ public static FixedSizeBinary getRootAsFixedSizeBinary(ByteBuffer _bb) { return getRootAsFixedSizeBinary(_bb, new FixedSizeBinary()); }
+ public static FixedSizeBinary getRootAsFixedSizeBinary(ByteBuffer _bb, FixedSizeBinary obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+ public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+ public FixedSizeBinary __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+ /**
+ * Number of bytes per value
+ */
+ public int byteWidth() { int o = __offset(4); return o != 0 ? bb.getInt(o + bb_pos) : 0; }
+
+ public static int createFixedSizeBinary(FlatBufferBuilder builder,
+ int byteWidth) {
+ builder.startTable(1);
+ FixedSizeBinary.addByteWidth(builder, byteWidth);
+ return FixedSizeBinary.endFixedSizeBinary(builder);
+ }
+
+ public static void startFixedSizeBinary(FlatBufferBuilder builder) { builder.startTable(1); }
+ public static void addByteWidth(FlatBufferBuilder builder, int byteWidth) { builder.addInt(0, byteWidth, 0); }
+ public static int endFixedSizeBinary(FlatBufferBuilder builder) {
+ int o = builder.endTable();
+ return o;
+ }
+
+ public static final class Vector extends BaseVector {
+ public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+ public FixedSizeBinary get(int j) { return get(new FixedSizeBinary(), j); }
+ public FixedSizeBinary get(FixedSizeBinary obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); }
+ }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/FixedSizeList.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/FixedSizeList.java
new file mode 100644
index 000000000..d0d889238
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/FixedSizeList.java
@@ -0,0 +1,60 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+public final class FixedSizeList extends Table {
+ public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+ public static FixedSizeList getRootAsFixedSizeList(ByteBuffer _bb) { return getRootAsFixedSizeList(_bb, new FixedSizeList()); }
+ public static FixedSizeList getRootAsFixedSizeList(ByteBuffer _bb, FixedSizeList obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+ public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+ public FixedSizeList __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+ /**
+ * Number of list items per value
+ */
+ public int listSize() { int o = __offset(4); return o != 0 ? bb.getInt(o + bb_pos) : 0; }
+
+ public static int createFixedSizeList(FlatBufferBuilder builder,
+ int listSize) {
+ builder.startTable(1);
+ FixedSizeList.addListSize(builder, listSize);
+ return FixedSizeList.endFixedSizeList(builder);
+ }
+
+ public static void startFixedSizeList(FlatBufferBuilder builder) { builder.startTable(1); }
+ public static void addListSize(FlatBufferBuilder builder, int listSize) { builder.addInt(0, listSize, 0); }
+ public static int endFixedSizeList(FlatBufferBuilder builder) {
+ int o = builder.endTable();
+ return o;
+ }
+
+ public static final class Vector extends BaseVector {
+ public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+ public FixedSizeList get(int j) { return get(new FixedSizeList(), j); }
+ public FixedSizeList get(FixedSizeList obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); }
+ }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/FloatingPoint.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/FloatingPoint.java
new file mode 100644
index 000000000..945fa627d
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/FloatingPoint.java
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+public final class FloatingPoint extends Table {
+ public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+ public static FloatingPoint getRootAsFloatingPoint(ByteBuffer _bb) { return getRootAsFloatingPoint(_bb, new FloatingPoint()); }
+ public static FloatingPoint getRootAsFloatingPoint(ByteBuffer _bb, FloatingPoint obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+ public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+ public FloatingPoint __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+ public short precision() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 0; }
+
+ public static int createFloatingPoint(FlatBufferBuilder builder,
+ short precision) {
+ builder.startTable(1);
+ FloatingPoint.addPrecision(builder, precision);
+ return FloatingPoint.endFloatingPoint(builder);
+ }
+
+ public static void startFloatingPoint(FlatBufferBuilder builder) { builder.startTable(1); }
+ public static void addPrecision(FlatBufferBuilder builder, short precision) { builder.addShort(0, precision, 0); }
+ public static int endFloatingPoint(FlatBufferBuilder builder) {
+ int o = builder.endTable();
+ return o;
+ }
+
+ public static final class Vector extends BaseVector {
+ public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+ public FloatingPoint get(int j) { return get(new FloatingPoint(), j); }
+ public FloatingPoint get(FloatingPoint obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); }
+ }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Footer.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Footer.java
new file mode 100644
index 000000000..86fd75e03
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Footer.java
@@ -0,0 +1,100 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * ----------------------------------------------------------------------
+ * Arrow File metadata
+ *
+ */
+public final class Footer extends Table {
+ public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+ public static Footer getRootAsFooter(ByteBuffer _bb) { return getRootAsFooter(_bb, new Footer()); }
+ public static Footer getRootAsFooter(ByteBuffer _bb, Footer obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+ public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+ public Footer __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+ public short version() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 0; }
+ public org.apache.arrow.flatbuf.Schema schema() { return schema(new org.apache.arrow.flatbuf.Schema()); }
+ public org.apache.arrow.flatbuf.Schema schema(org.apache.arrow.flatbuf.Schema obj) { int o = __offset(6); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; }
+ public org.apache.arrow.flatbuf.Block dictionaries(int j) { return dictionaries(new org.apache.arrow.flatbuf.Block(), j); }
+ public org.apache.arrow.flatbuf.Block dictionaries(org.apache.arrow.flatbuf.Block obj, int j) { int o = __offset(8); return o != 0 ? obj.__assign(__vector(o) + j * 24, bb) : null; }
+ public int dictionariesLength() { int o = __offset(8); return o != 0 ? __vector_len(o) : 0; }
+ public org.apache.arrow.flatbuf.Block.Vector dictionariesVector() { return dictionariesVector(new org.apache.arrow.flatbuf.Block.Vector()); }
+ public org.apache.arrow.flatbuf.Block.Vector dictionariesVector(org.apache.arrow.flatbuf.Block.Vector obj) { int o = __offset(8); return o != 0 ? obj.__assign(__vector(o), 24, bb) : null; }
+ public org.apache.arrow.flatbuf.Block recordBatches(int j) { return recordBatches(new org.apache.arrow.flatbuf.Block(), j); }
+ public org.apache.arrow.flatbuf.Block recordBatches(org.apache.arrow.flatbuf.Block obj, int j) { int o = __offset(10); return o != 0 ? obj.__assign(__vector(o) + j * 24, bb) : null; }
+ public int recordBatchesLength() { int o = __offset(10); return o != 0 ? __vector_len(o) : 0; }
+ public org.apache.arrow.flatbuf.Block.Vector recordBatchesVector() { return recordBatchesVector(new org.apache.arrow.flatbuf.Block.Vector()); }
+ public org.apache.arrow.flatbuf.Block.Vector recordBatchesVector(org.apache.arrow.flatbuf.Block.Vector obj) { int o = __offset(10); return o != 0 ? obj.__assign(__vector(o), 24, bb) : null; }
+ /**
+ * User-defined metadata
+ */
+ public org.apache.arrow.flatbuf.KeyValue customMetadata(int j) { return customMetadata(new org.apache.arrow.flatbuf.KeyValue(), j); }
+ public org.apache.arrow.flatbuf.KeyValue customMetadata(org.apache.arrow.flatbuf.KeyValue obj, int j) { int o = __offset(12); return o != 0 ? obj.__assign(__indirect(__vector(o) + j * 4), bb) : null; }
+ public int customMetadataLength() { int o = __offset(12); return o != 0 ? __vector_len(o) : 0; }
+ public org.apache.arrow.flatbuf.KeyValue.Vector customMetadataVector() { return customMetadataVector(new org.apache.arrow.flatbuf.KeyValue.Vector()); }
+ public org.apache.arrow.flatbuf.KeyValue.Vector customMetadataVector(org.apache.arrow.flatbuf.KeyValue.Vector obj) { int o = __offset(12); return o != 0 ? obj.__assign(__vector(o), 4, bb) : null; }
+
+ public static int createFooter(FlatBufferBuilder builder,
+ short version,
+ int schemaOffset,
+ int dictionariesOffset,
+ int recordBatchesOffset,
+ int custom_metadataOffset) {
+ builder.startTable(5);
+ Footer.addCustomMetadata(builder, custom_metadataOffset);
+ Footer.addRecordBatches(builder, recordBatchesOffset);
+ Footer.addDictionaries(builder, dictionariesOffset);
+ Footer.addSchema(builder, schemaOffset);
+ Footer.addVersion(builder, version);
+ return Footer.endFooter(builder);
+ }
+
+ public static void startFooter(FlatBufferBuilder builder) { builder.startTable(5); }
+ public static void addVersion(FlatBufferBuilder builder, short version) { builder.addShort(0, version, 0); }
+ public static void addSchema(FlatBufferBuilder builder, int schemaOffset) { builder.addOffset(1, schemaOffset, 0); }
+ public static void addDictionaries(FlatBufferBuilder builder, int dictionariesOffset) { builder.addOffset(2, dictionariesOffset, 0); }
+ public static void startDictionariesVector(FlatBufferBuilder builder, int numElems) { builder.startVector(24, numElems, 8); }
+ public static void addRecordBatches(FlatBufferBuilder builder, int recordBatchesOffset) { builder.addOffset(3, recordBatchesOffset, 0); }
+ public static void startRecordBatchesVector(FlatBufferBuilder builder, int numElems) { builder.startVector(24, numElems, 8); }
+ public static void addCustomMetadata(FlatBufferBuilder builder, int customMetadataOffset) { builder.addOffset(4, customMetadataOffset, 0); }
+ public static int createCustomMetadataVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addOffset(data[i]); return builder.endVector(); }
+ public static void startCustomMetadataVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); }
+ public static int endFooter(FlatBufferBuilder builder) {
+ int o = builder.endTable();
+ return o;
+ }
+ public static void finishFooterBuffer(FlatBufferBuilder builder, int offset) { builder.finish(offset); }
+ public static void finishSizePrefixedFooterBuffer(FlatBufferBuilder builder, int offset) { builder.finishSizePrefixed(offset); }
+
+ public static final class Vector extends BaseVector {
+ public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+ public Footer get(int j) { return get(new Footer(), j); }
+ public Footer get(Footer obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); }
+ }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Int.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Int.java
new file mode 100644
index 000000000..94cb96a05
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Int.java
@@ -0,0 +1,61 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+public final class Int extends Table {
+ public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+ public static Int getRootAsInt(ByteBuffer _bb) { return getRootAsInt(_bb, new Int()); }
+ public static Int getRootAsInt(ByteBuffer _bb, Int obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+ public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+ public Int __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+ public int bitWidth() { int o = __offset(4); return o != 0 ? bb.getInt(o + bb_pos) : 0; }
+ public boolean isSigned() { int o = __offset(6); return o != 0 ? 0!=bb.get(o + bb_pos) : false; }
+
+ public static int createInt(FlatBufferBuilder builder,
+ int bitWidth,
+ boolean is_signed) {
+ builder.startTable(2);
+ Int.addBitWidth(builder, bitWidth);
+ Int.addIsSigned(builder, is_signed);
+ return Int.endInt(builder);
+ }
+
+ public static void startInt(FlatBufferBuilder builder) { builder.startTable(2); }
+ public static void addBitWidth(FlatBufferBuilder builder, int bitWidth) { builder.addInt(0, bitWidth, 0); }
+ public static void addIsSigned(FlatBufferBuilder builder, boolean isSigned) { builder.addBoolean(1, isSigned, false); }
+ public static int endInt(FlatBufferBuilder builder) {
+ int o = builder.endTable();
+ return o;
+ }
+
+ public static final class Vector extends BaseVector {
+ public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+ public Int get(int j) { return get(new Int(), j); }
+ public Int get(Int obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); }
+ }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Interval.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Interval.java
new file mode 100644
index 000000000..e690b0bad
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Interval.java
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+public final class Interval extends Table {
+ public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+ public static Interval getRootAsInterval(ByteBuffer _bb) { return getRootAsInterval(_bb, new Interval()); }
+ public static Interval getRootAsInterval(ByteBuffer _bb, Interval obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+ public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+ public Interval __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+ public short unit() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 0; }
+
+ public static int createInterval(FlatBufferBuilder builder,
+ short unit) {
+ builder.startTable(1);
+ Interval.addUnit(builder, unit);
+ return Interval.endInterval(builder);
+ }
+
+ public static void startInterval(FlatBufferBuilder builder) { builder.startTable(1); }
+ public static void addUnit(FlatBufferBuilder builder, short unit) { builder.addShort(0, unit, 0); }
+ public static int endInterval(FlatBufferBuilder builder) {
+ int o = builder.endTable();
+ return o;
+ }
+
+ public static final class Vector extends BaseVector {
+ public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+ public Interval get(int j) { return get(new Interval(), j); }
+ public Interval get(Interval obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); }
+ }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/IntervalUnit.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/IntervalUnit.java
new file mode 100644
index 000000000..2b1e8248a
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/IntervalUnit.java
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+public final class IntervalUnit {
+ private IntervalUnit() { }
+ public static final short YEAR_MONTH = 0;
+ public static final short DAY_TIME = 1;
+ public static final short MONTH_DAY_NANO = 2;
+
+ public static final String[] names = { "YEAR_MONTH", "DAY_TIME", "MONTH_DAY_NANO", };
+
+ public static String name(int e) { return names[e]; }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/KeyValue.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/KeyValue.java
new file mode 100644
index 000000000..0c6e9f66e
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/KeyValue.java
@@ -0,0 +1,70 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * ----------------------------------------------------------------------
+ * user defined key value pairs to add custom metadata to arrow
+ * key namespacing is the responsibility of the user
+ */
+public final class KeyValue extends Table {
+ public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+ public static KeyValue getRootAsKeyValue(ByteBuffer _bb) { return getRootAsKeyValue(_bb, new KeyValue()); }
+ public static KeyValue getRootAsKeyValue(ByteBuffer _bb, KeyValue obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+ public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+ public KeyValue __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+ public String key() { int o = __offset(4); return o != 0 ? __string(o + bb_pos) : null; }
+ public ByteBuffer keyAsByteBuffer() { return __vector_as_bytebuffer(4, 1); }
+ public ByteBuffer keyInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 4, 1); }
+ public String value() { int o = __offset(6); return o != 0 ? __string(o + bb_pos) : null; }
+ public ByteBuffer valueAsByteBuffer() { return __vector_as_bytebuffer(6, 1); }
+ public ByteBuffer valueInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 6, 1); }
+
+ public static int createKeyValue(FlatBufferBuilder builder,
+ int keyOffset,
+ int valueOffset) {
+ builder.startTable(2);
+ KeyValue.addValue(builder, valueOffset);
+ KeyValue.addKey(builder, keyOffset);
+ return KeyValue.endKeyValue(builder);
+ }
+
+ public static void startKeyValue(FlatBufferBuilder builder) { builder.startTable(2); }
+ public static void addKey(FlatBufferBuilder builder, int keyOffset) { builder.addOffset(0, keyOffset, 0); }
+ public static void addValue(FlatBufferBuilder builder, int valueOffset) { builder.addOffset(1, valueOffset, 0); }
+ public static int endKeyValue(FlatBufferBuilder builder) {
+ int o = builder.endTable();
+ return o;
+ }
+
+ public static final class Vector extends BaseVector {
+ public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+ public KeyValue get(int j) { return get(new KeyValue(), j); }
+ public KeyValue get(KeyValue obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); }
+ }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/LargeBinary.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/LargeBinary.java
new file mode 100644
index 000000000..b7377bbe9
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/LargeBinary.java
@@ -0,0 +1,52 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * Same as Binary, but with 64-bit offsets, allowing to represent
+ * extremely large data values.
+ */
+public final class LargeBinary extends Table {
+ public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+ public static LargeBinary getRootAsLargeBinary(ByteBuffer _bb) { return getRootAsLargeBinary(_bb, new LargeBinary()); }
+ public static LargeBinary getRootAsLargeBinary(ByteBuffer _bb, LargeBinary obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+ public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+ public LargeBinary __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+
+ public static void startLargeBinary(FlatBufferBuilder builder) { builder.startTable(0); }
+ public static int endLargeBinary(FlatBufferBuilder builder) {
+ int o = builder.endTable();
+ return o;
+ }
+
+ public static final class Vector extends BaseVector {
+ public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+ public LargeBinary get(int j) { return get(new LargeBinary(), j); }
+ public LargeBinary get(LargeBinary obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); }
+ }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/LargeList.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/LargeList.java
new file mode 100644
index 000000000..32cc0034c
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/LargeList.java
@@ -0,0 +1,52 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * Same as List, but with 64-bit offsets, allowing to represent
+ * extremely large data values.
+ */
+public final class LargeList extends Table {
+ public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+ public static LargeList getRootAsLargeList(ByteBuffer _bb) { return getRootAsLargeList(_bb, new LargeList()); }
+ public static LargeList getRootAsLargeList(ByteBuffer _bb, LargeList obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+ public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+ public LargeList __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+
+ public static void startLargeList(FlatBufferBuilder builder) { builder.startTable(0); }
+ public static int endLargeList(FlatBufferBuilder builder) {
+ int o = builder.endTable();
+ return o;
+ }
+
+ public static final class Vector extends BaseVector {
+ public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+ public LargeList get(int j) { return get(new LargeList(), j); }
+ public LargeList get(LargeList obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); }
+ }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/LargeUtf8.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/LargeUtf8.java
new file mode 100644
index 000000000..7e7a20117
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/LargeUtf8.java
@@ -0,0 +1,52 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * Same as Utf8, but with 64-bit offsets, allowing to represent
+ * extremely large data values.
+ */
+public final class LargeUtf8 extends Table {
+ public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+ public static LargeUtf8 getRootAsLargeUtf8(ByteBuffer _bb) { return getRootAsLargeUtf8(_bb, new LargeUtf8()); }
+ public static LargeUtf8 getRootAsLargeUtf8(ByteBuffer _bb, LargeUtf8 obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+ public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+ public LargeUtf8 __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+
+ public static void startLargeUtf8(FlatBufferBuilder builder) { builder.startTable(0); }
+ public static int endLargeUtf8(FlatBufferBuilder builder) {
+ int o = builder.endTable();
+ return o;
+ }
+
+ public static final class Vector extends BaseVector {
+ public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+ public LargeUtf8 get(int j) { return get(new LargeUtf8(), j); }
+ public LargeUtf8 get(LargeUtf8 obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); }
+ }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/List.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/List.java
new file mode 100644
index 000000000..4493f9c5b
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/List.java
@@ -0,0 +1,48 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+public final class List extends Table {
+ public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+ public static List getRootAsList(ByteBuffer _bb) { return getRootAsList(_bb, new List()); }
+ public static List getRootAsList(ByteBuffer _bb, List obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+ public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+ public List __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+
+ public static void startList(FlatBufferBuilder builder) { builder.startTable(0); }
+ public static int endList(FlatBufferBuilder builder) {
+ int o = builder.endTable();
+ return o;
+ }
+
+ public static final class Vector extends BaseVector {
+ public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+ public List get(int j) { return get(new List(), j); }
+ public List get(List obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); }
+ }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Map.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Map.java
new file mode 100644
index 000000000..704426e92
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Map.java
@@ -0,0 +1,87 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * A Map is a logical nested type that is represented as
+ *
+ * List<entries: Struct<key: K, value: V>>
+ *
+ * In this layout, the keys and values are each respectively contiguous. We do
+ * not constrain the key and value types, so the application is responsible
+ * for ensuring that the keys are hashable and unique. Whether the keys are sorted
+ * may be set in the metadata for this field.
+ *
+ * In a field with Map type, the field has a child Struct field, which then
+ * has two children: key type and the second the value type. The names of the
+ * child fields may be respectively "entries", "key", and "value", but this is
+ * not enforced.
+ *
+ * Map
+ * ```text
+ * - child[0] entries: Struct
+ * - child[0] key: K
+ * - child[1] value: V
+ * ```
+ * Neither the "entries" field nor the "key" field may be nullable.
+ *
+ * The metadata is structured so that Arrow systems without special handling
+ * for Map can make Map an alias for List. The "layout" attribute for the Map
+ * field must have the same contents as a List.
+ */
+public final class Map extends Table {
+ public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+ public static Map getRootAsMap(ByteBuffer _bb) { return getRootAsMap(_bb, new Map()); }
+ public static Map getRootAsMap(ByteBuffer _bb, Map obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+ public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+ public Map __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+ /**
+ * Set to true if the keys within each value are sorted
+ */
+ public boolean keysSorted() { int o = __offset(4); return o != 0 ? 0!=bb.get(o + bb_pos) : false; }
+
+ public static int createMap(FlatBufferBuilder builder,
+ boolean keysSorted) {
+ builder.startTable(1);
+ Map.addKeysSorted(builder, keysSorted);
+ return Map.endMap(builder);
+ }
+
+ public static void startMap(FlatBufferBuilder builder) { builder.startTable(1); }
+ public static void addKeysSorted(FlatBufferBuilder builder, boolean keysSorted) { builder.addBoolean(0, keysSorted, false); }
+ public static int endMap(FlatBufferBuilder builder) {
+ int o = builder.endTable();
+ return o;
+ }
+
+ public static final class Vector extends BaseVector {
+ public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+ public Map get(int j) { return get(new Map(), j); }
+ public Map get(Map obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); }
+ }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Message.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Message.java
new file mode 100644
index 000000000..c7738ad95
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Message.java
@@ -0,0 +1,81 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+public final class Message extends Table {
+ public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+ public static Message getRootAsMessage(ByteBuffer _bb) { return getRootAsMessage(_bb, new Message()); }
+ public static Message getRootAsMessage(ByteBuffer _bb, Message obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+ public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+ public Message __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+ public short version() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 0; }
+ public byte headerType() { int o = __offset(6); return o != 0 ? bb.get(o + bb_pos) : 0; }
+ public Table header(Table obj) { int o = __offset(8); return o != 0 ? __union(obj, o + bb_pos) : null; }
+ public long bodyLength() { int o = __offset(10); return o != 0 ? bb.getLong(o + bb_pos) : 0L; }
+ public org.apache.arrow.flatbuf.KeyValue customMetadata(int j) { return customMetadata(new org.apache.arrow.flatbuf.KeyValue(), j); }
+ public org.apache.arrow.flatbuf.KeyValue customMetadata(org.apache.arrow.flatbuf.KeyValue obj, int j) { int o = __offset(12); return o != 0 ? obj.__assign(__indirect(__vector(o) + j * 4), bb) : null; }
+ public int customMetadataLength() { int o = __offset(12); return o != 0 ? __vector_len(o) : 0; }
+ public org.apache.arrow.flatbuf.KeyValue.Vector customMetadataVector() { return customMetadataVector(new org.apache.arrow.flatbuf.KeyValue.Vector()); }
+ public org.apache.arrow.flatbuf.KeyValue.Vector customMetadataVector(org.apache.arrow.flatbuf.KeyValue.Vector obj) { int o = __offset(12); return o != 0 ? obj.__assign(__vector(o), 4, bb) : null; }
+
+ public static int createMessage(FlatBufferBuilder builder,
+ short version,
+ byte header_type,
+ int headerOffset,
+ long bodyLength,
+ int custom_metadataOffset) {
+ builder.startTable(5);
+ Message.addBodyLength(builder, bodyLength);
+ Message.addCustomMetadata(builder, custom_metadataOffset);
+ Message.addHeader(builder, headerOffset);
+ Message.addVersion(builder, version);
+ Message.addHeaderType(builder, header_type);
+ return Message.endMessage(builder);
+ }
+
+ public static void startMessage(FlatBufferBuilder builder) { builder.startTable(5); }
+ public static void addVersion(FlatBufferBuilder builder, short version) { builder.addShort(0, version, 0); }
+ public static void addHeaderType(FlatBufferBuilder builder, byte headerType) { builder.addByte(1, headerType, 0); }
+ public static void addHeader(FlatBufferBuilder builder, int headerOffset) { builder.addOffset(2, headerOffset, 0); }
+ public static void addBodyLength(FlatBufferBuilder builder, long bodyLength) { builder.addLong(3, bodyLength, 0L); }
+ public static void addCustomMetadata(FlatBufferBuilder builder, int customMetadataOffset) { builder.addOffset(4, customMetadataOffset, 0); }
+ public static int createCustomMetadataVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addOffset(data[i]); return builder.endVector(); }
+ public static void startCustomMetadataVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); }
+ public static int endMessage(FlatBufferBuilder builder) {
+ int o = builder.endTable();
+ return o;
+ }
+ public static void finishMessageBuffer(FlatBufferBuilder builder, int offset) { builder.finish(offset); }
+ public static void finishSizePrefixedMessageBuffer(FlatBufferBuilder builder, int offset) { builder.finishSizePrefixed(offset); }
+
+ public static final class Vector extends BaseVector {
+ public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+ public Message get(int j) { return get(new Message(), j); }
+ public Message get(Message obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); }
+ }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/MessageHeader.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/MessageHeader.java
new file mode 100644
index 000000000..179b6ba0f
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/MessageHeader.java
@@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+/**
+ * ----------------------------------------------------------------------
+ * The root Message type
+ * This union enables us to easily send different message types without
+ * redundant storage, and in the future we can easily add new message types.
+ *
+ * Arrow implementations do not need to implement all of the message types,
+ * which may include experimental metadata types. For maximum compatibility,
+ * it is best to send data using RecordBatch
+ */
+public final class MessageHeader {
+ private MessageHeader() { }
+ public static final byte NONE = 0;
+ public static final byte Schema = 1;
+ public static final byte DictionaryBatch = 2;
+ public static final byte RecordBatch = 3;
+ public static final byte Tensor = 4;
+ public static final byte SparseTensor = 5;
+
+ public static final String[] names = { "NONE", "Schema", "DictionaryBatch", "RecordBatch", "Tensor", "SparseTensor", };
+
+ public static String name(int e) { return names[e]; }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/MetadataVersion.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/MetadataVersion.java
new file mode 100644
index 000000000..8ce9d84fc
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/MetadataVersion.java
@@ -0,0 +1,54 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+public final class MetadataVersion {
+ private MetadataVersion() { }
+ /**
+ * 0.1.0 (October 2016).
+ */
+ public static final short V1 = 0;
+ /**
+ * 0.2.0 (February 2017). Non-backwards compatible with V1.
+ */
+ public static final short V2 = 1;
+ /**
+ * 0.3.0 -> 0.7.1 (May - December 2017). Non-backwards compatible with V2.
+ */
+ public static final short V3 = 2;
+ /**
+ * >= 0.8.0 (December 2017). Non-backwards compatible with V3.
+ */
+ public static final short V4 = 3;
+ /**
+ * >= 1.0.0 (July 2020. Backwards compatible with V4 (V5 readers can read V4
+ * metadata and IPC messages). Implementations are recommended to provide a
+ * V4 compatibility mode with V5 format changes disabled.
+ *
+ * Incompatible changes between V4 and V5:
+ * - Union buffer layout has changed. In V5, Unions don't have a validity
+ * bitmap buffer.
+ */
+ public static final short V5 = 4;
+
+ public static final String[] names = { "V1", "V2", "V3", "V4", "V5", };
+
+ public static String name(int e) { return names[e]; }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Null.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Null.java
new file mode 100644
index 000000000..b7a30f2e8
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Null.java
@@ -0,0 +1,51 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * These are stored in the flatbuffer in the Type union below
+ */
+public final class Null extends Table {
+ public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+ public static Null getRootAsNull(ByteBuffer _bb) { return getRootAsNull(_bb, new Null()); }
+ public static Null getRootAsNull(ByteBuffer _bb, Null obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+ public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+ public Null __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+
+ public static void startNull(FlatBufferBuilder builder) { builder.startTable(0); }
+ public static int endNull(FlatBufferBuilder builder) {
+ int o = builder.endTable();
+ return o;
+ }
+
+ public static final class Vector extends BaseVector {
+ public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+ public Null get(int j) { return get(new Null(), j); }
+ public Null get(Null obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); }
+ }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Precision.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Precision.java
new file mode 100644
index 000000000..e2c42237a
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Precision.java
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+public final class Precision {
+ private Precision() { }
+ public static final short HALF = 0;
+ public static final short SINGLE = 1;
+ public static final short DOUBLE = 2;
+
+ public static final String[] names = { "HALF", "SINGLE", "DOUBLE", };
+
+ public static String name(int e) { return names[e]; }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/RecordBatch.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/RecordBatch.java
new file mode 100644
index 000000000..eb814e07d
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/RecordBatch.java
@@ -0,0 +1,103 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * A data header describing the shared memory layout of a "record" or "row"
+ * batch. Some systems call this a "row batch" internally and others a "record
+ * batch".
+ */
+public final class RecordBatch extends Table {
+ public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+ public static RecordBatch getRootAsRecordBatch(ByteBuffer _bb) { return getRootAsRecordBatch(_bb, new RecordBatch()); }
+ public static RecordBatch getRootAsRecordBatch(ByteBuffer _bb, RecordBatch obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+ public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+ public RecordBatch __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+ /**
+ * number of records / rows. The arrays in the batch should all have this
+ * length
+ */
+ public long length() { int o = __offset(4); return o != 0 ? bb.getLong(o + bb_pos) : 0L; }
+ /**
+ * Nodes correspond to the pre-ordered flattened logical schema
+ */
+ public org.apache.arrow.flatbuf.FieldNode nodes(int j) { return nodes(new org.apache.arrow.flatbuf.FieldNode(), j); }
+ public org.apache.arrow.flatbuf.FieldNode nodes(org.apache.arrow.flatbuf.FieldNode obj, int j) { int o = __offset(6); return o != 0 ? obj.__assign(__vector(o) + j * 16, bb) : null; }
+ public int nodesLength() { int o = __offset(6); return o != 0 ? __vector_len(o) : 0; }
+ public org.apache.arrow.flatbuf.FieldNode.Vector nodesVector() { return nodesVector(new org.apache.arrow.flatbuf.FieldNode.Vector()); }
+ public org.apache.arrow.flatbuf.FieldNode.Vector nodesVector(org.apache.arrow.flatbuf.FieldNode.Vector obj) { int o = __offset(6); return o != 0 ? obj.__assign(__vector(o), 16, bb) : null; }
+ /**
+ * Buffers correspond to the pre-ordered flattened buffer tree
+ *
+ * The number of buffers appended to this list depends on the schema. For
+ * example, most primitive arrays will have 2 buffers, 1 for the validity
+ * bitmap and 1 for the values. For struct arrays, there will only be a
+ * single buffer for the validity (nulls) bitmap
+ */
+ public org.apache.arrow.flatbuf.Buffer buffers(int j) { return buffers(new org.apache.arrow.flatbuf.Buffer(), j); }
+ public org.apache.arrow.flatbuf.Buffer buffers(org.apache.arrow.flatbuf.Buffer obj, int j) { int o = __offset(8); return o != 0 ? obj.__assign(__vector(o) + j * 16, bb) : null; }
+ public int buffersLength() { int o = __offset(8); return o != 0 ? __vector_len(o) : 0; }
+ public org.apache.arrow.flatbuf.Buffer.Vector buffersVector() { return buffersVector(new org.apache.arrow.flatbuf.Buffer.Vector()); }
+ public org.apache.arrow.flatbuf.Buffer.Vector buffersVector(org.apache.arrow.flatbuf.Buffer.Vector obj) { int o = __offset(8); return o != 0 ? obj.__assign(__vector(o), 16, bb) : null; }
+ /**
+ * Optional compression of the message body
+ */
+ public org.apache.arrow.flatbuf.BodyCompression compression() { return compression(new org.apache.arrow.flatbuf.BodyCompression()); }
+ public org.apache.arrow.flatbuf.BodyCompression compression(org.apache.arrow.flatbuf.BodyCompression obj) { int o = __offset(10); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; }
+
+ public static int createRecordBatch(FlatBufferBuilder builder,
+ long length,
+ int nodesOffset,
+ int buffersOffset,
+ int compressionOffset) {
+ builder.startTable(4);
+ RecordBatch.addLength(builder, length);
+ RecordBatch.addCompression(builder, compressionOffset);
+ RecordBatch.addBuffers(builder, buffersOffset);
+ RecordBatch.addNodes(builder, nodesOffset);
+ return RecordBatch.endRecordBatch(builder);
+ }
+
+ public static void startRecordBatch(FlatBufferBuilder builder) { builder.startTable(4); }
+ public static void addLength(FlatBufferBuilder builder, long length) { builder.addLong(0, length, 0L); }
+ public static void addNodes(FlatBufferBuilder builder, int nodesOffset) { builder.addOffset(1, nodesOffset, 0); }
+ public static void startNodesVector(FlatBufferBuilder builder, int numElems) { builder.startVector(16, numElems, 8); }
+ public static void addBuffers(FlatBufferBuilder builder, int buffersOffset) { builder.addOffset(2, buffersOffset, 0); }
+ public static void startBuffersVector(FlatBufferBuilder builder, int numElems) { builder.startVector(16, numElems, 8); }
+ public static void addCompression(FlatBufferBuilder builder, int compressionOffset) { builder.addOffset(3, compressionOffset, 0); }
+ public static int endRecordBatch(FlatBufferBuilder builder) {
+ int o = builder.endTable();
+ return o;
+ }
+
+ public static final class Vector extends BaseVector {
+ public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+ public RecordBatch get(int j) { return get(new RecordBatch(), j); }
+ public RecordBatch get(RecordBatch obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); }
+ }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Schema.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Schema.java
new file mode 100644
index 000000000..69c025254
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Schema.java
@@ -0,0 +1,102 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * ----------------------------------------------------------------------
+ * A Schema describes the columns in a row batch
+ */
+public final class Schema extends Table {
+ public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+ public static Schema getRootAsSchema(ByteBuffer _bb) { return getRootAsSchema(_bb, new Schema()); }
+ public static Schema getRootAsSchema(ByteBuffer _bb, Schema obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+ public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+ public Schema __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+ /**
+ * endianness of the buffer
+ * it is Little Endian by default
+ * if endianness doesn't match the underlying system then the vectors need to be converted
+ */
+ public short endianness() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 0; }
+ public org.apache.arrow.flatbuf.Field fields(int j) { return fields(new org.apache.arrow.flatbuf.Field(), j); }
+ public org.apache.arrow.flatbuf.Field fields(org.apache.arrow.flatbuf.Field obj, int j) { int o = __offset(6); return o != 0 ? obj.__assign(__indirect(__vector(o) + j * 4), bb) : null; }
+ public int fieldsLength() { int o = __offset(6); return o != 0 ? __vector_len(o) : 0; }
+ public org.apache.arrow.flatbuf.Field.Vector fieldsVector() { return fieldsVector(new org.apache.arrow.flatbuf.Field.Vector()); }
+ public org.apache.arrow.flatbuf.Field.Vector fieldsVector(org.apache.arrow.flatbuf.Field.Vector obj) { int o = __offset(6); return o != 0 ? obj.__assign(__vector(o), 4, bb) : null; }
+ public org.apache.arrow.flatbuf.KeyValue customMetadata(int j) { return customMetadata(new org.apache.arrow.flatbuf.KeyValue(), j); }
+ public org.apache.arrow.flatbuf.KeyValue customMetadata(org.apache.arrow.flatbuf.KeyValue obj, int j) { int o = __offset(8); return o != 0 ? obj.__assign(__indirect(__vector(o) + j * 4), bb) : null; }
+ public int customMetadataLength() { int o = __offset(8); return o != 0 ? __vector_len(o) : 0; }
+ public org.apache.arrow.flatbuf.KeyValue.Vector customMetadataVector() { return customMetadataVector(new org.apache.arrow.flatbuf.KeyValue.Vector()); }
+ public org.apache.arrow.flatbuf.KeyValue.Vector customMetadataVector(org.apache.arrow.flatbuf.KeyValue.Vector obj) { int o = __offset(8); return o != 0 ? obj.__assign(__vector(o), 4, bb) : null; }
+ /**
+ * Features used in the stream/file.
+ */
+ public long features(int j) { int o = __offset(10); return o != 0 ? bb.getLong(__vector(o) + j * 8) : 0; }
+ public int featuresLength() { int o = __offset(10); return o != 0 ? __vector_len(o) : 0; }
+ public LongVector featuresVector() { return featuresVector(new LongVector()); }
+ public LongVector featuresVector(LongVector obj) { int o = __offset(10); return o != 0 ? obj.__assign(__vector(o), bb) : null; }
+ public ByteBuffer featuresAsByteBuffer() { return __vector_as_bytebuffer(10, 8); }
+ public ByteBuffer featuresInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 10, 8); }
+
+ public static int createSchema(FlatBufferBuilder builder,
+ short endianness,
+ int fieldsOffset,
+ int custom_metadataOffset,
+ int featuresOffset) {
+ builder.startTable(4);
+ Schema.addFeatures(builder, featuresOffset);
+ Schema.addCustomMetadata(builder, custom_metadataOffset);
+ Schema.addFields(builder, fieldsOffset);
+ Schema.addEndianness(builder, endianness);
+ return Schema.endSchema(builder);
+ }
+
+ public static void startSchema(FlatBufferBuilder builder) { builder.startTable(4); }
+ public static void addEndianness(FlatBufferBuilder builder, short endianness) { builder.addShort(0, endianness, 0); }
+ public static void addFields(FlatBufferBuilder builder, int fieldsOffset) { builder.addOffset(1, fieldsOffset, 0); }
+ public static int createFieldsVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addOffset(data[i]); return builder.endVector(); }
+ public static void startFieldsVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); }
+ public static void addCustomMetadata(FlatBufferBuilder builder, int customMetadataOffset) { builder.addOffset(2, customMetadataOffset, 0); }
+ public static int createCustomMetadataVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addOffset(data[i]); return builder.endVector(); }
+ public static void startCustomMetadataVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); }
+ public static void addFeatures(FlatBufferBuilder builder, int featuresOffset) { builder.addOffset(3, featuresOffset, 0); }
+ public static int createFeaturesVector(FlatBufferBuilder builder, long[] data) { builder.startVector(8, data.length, 8); for (int i = data.length - 1; i >= 0; i--) builder.addLong(data[i]); return builder.endVector(); }
+ public static void startFeaturesVector(FlatBufferBuilder builder, int numElems) { builder.startVector(8, numElems, 8); }
+ public static int endSchema(FlatBufferBuilder builder) {
+ int o = builder.endTable();
+ return o;
+ }
+ public static void finishSchemaBuffer(FlatBufferBuilder builder, int offset) { builder.finish(offset); }
+ public static void finishSizePrefixedSchemaBuffer(FlatBufferBuilder builder, int offset) { builder.finishSizePrefixed(offset); }
+
+ public static final class Vector extends BaseVector {
+ public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+ public Schema get(int j) { return get(new Schema(), j); }
+ public Schema get(Schema obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); }
+ }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/SparseMatrixCompressedAxis.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/SparseMatrixCompressedAxis.java
new file mode 100644
index 000000000..2ad314f2e
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/SparseMatrixCompressedAxis.java
@@ -0,0 +1,30 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+public final class SparseMatrixCompressedAxis {
+ private SparseMatrixCompressedAxis() { }
+ public static final short Row = 0;
+ public static final short Column = 1;
+
+ public static final String[] names = { "Row", "Column", };
+
+ public static String name(int e) { return names[e]; }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/SparseMatrixIndexCSX.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/SparseMatrixIndexCSX.java
new file mode 100644
index 000000000..9516a6ec1
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/SparseMatrixIndexCSX.java
@@ -0,0 +1,114 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * Compressed Sparse format, that is matrix-specific.
+ */
+public final class SparseMatrixIndexCSX extends Table {
+ public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+ public static SparseMatrixIndexCSX getRootAsSparseMatrixIndexCSX(ByteBuffer _bb) { return getRootAsSparseMatrixIndexCSX(_bb, new SparseMatrixIndexCSX()); }
+ public static SparseMatrixIndexCSX getRootAsSparseMatrixIndexCSX(ByteBuffer _bb, SparseMatrixIndexCSX obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+ public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+ public SparseMatrixIndexCSX __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+ /**
+ * Which axis, row or column, is compressed
+ */
+ public short compressedAxis() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 0; }
+ /**
+ * The type of values in indptrBuffer
+ */
+ public org.apache.arrow.flatbuf.Int indptrType() { return indptrType(new org.apache.arrow.flatbuf.Int()); }
+ public org.apache.arrow.flatbuf.Int indptrType(org.apache.arrow.flatbuf.Int obj) { int o = __offset(6); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; }
+ /**
+ * indptrBuffer stores the location and size of indptr array that
+ * represents the range of the rows.
+ * The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data.
+ * The length of this array is 1 + (the number of rows), and the type
+ * of index value is long.
+ *
+ * For example, let X be the following 6x4 matrix:
+ * ```text
+ * X := [[0, 1, 2, 0],
+ * [0, 0, 3, 0],
+ * [0, 4, 0, 5],
+ * [0, 0, 0, 0],
+ * [6, 0, 7, 8],
+ * [0, 9, 0, 0]].
+ * ```
+ * The array of non-zero values in X is:
+ * ```text
+ * values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9].
+ * ```
+ * And the indptr of X is:
+ * ```text
+ * indptr(X) = [0, 2, 3, 5, 5, 8, 10].
+ * ```
+ */
+ public org.apache.arrow.flatbuf.Buffer indptrBuffer() { return indptrBuffer(new org.apache.arrow.flatbuf.Buffer()); }
+ public org.apache.arrow.flatbuf.Buffer indptrBuffer(org.apache.arrow.flatbuf.Buffer obj) { int o = __offset(8); return o != 0 ? obj.__assign(o + bb_pos, bb) : null; }
+ /**
+ * The type of values in indicesBuffer
+ */
+ public org.apache.arrow.flatbuf.Int indicesType() { return indicesType(new org.apache.arrow.flatbuf.Int()); }
+ public org.apache.arrow.flatbuf.Int indicesType(org.apache.arrow.flatbuf.Int obj) { int o = __offset(10); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; }
+ /**
+ * indicesBuffer stores the location and size of the array that
+ * contains the column indices of the corresponding non-zero values.
+ * The type of index value is long.
+ *
+ * For example, the indices of the above X is:
+ * ```text
+ * indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1].
+ * ```
+ * Note that the indices are sorted in lexicographical order for each row.
+ */
+ public org.apache.arrow.flatbuf.Buffer indicesBuffer() { return indicesBuffer(new org.apache.arrow.flatbuf.Buffer()); }
+ public org.apache.arrow.flatbuf.Buffer indicesBuffer(org.apache.arrow.flatbuf.Buffer obj) { int o = __offset(12); return o != 0 ? obj.__assign(o + bb_pos, bb) : null; }
+
+ public static void startSparseMatrixIndexCSX(FlatBufferBuilder builder) { builder.startTable(5); }
+ public static void addCompressedAxis(FlatBufferBuilder builder, short compressedAxis) { builder.addShort(0, compressedAxis, 0); }
+ public static void addIndptrType(FlatBufferBuilder builder, int indptrTypeOffset) { builder.addOffset(1, indptrTypeOffset, 0); }
+ public static void addIndptrBuffer(FlatBufferBuilder builder, int indptrBufferOffset) { builder.addStruct(2, indptrBufferOffset, 0); }
+ public static void addIndicesType(FlatBufferBuilder builder, int indicesTypeOffset) { builder.addOffset(3, indicesTypeOffset, 0); }
+ public static void addIndicesBuffer(FlatBufferBuilder builder, int indicesBufferOffset) { builder.addStruct(4, indicesBufferOffset, 0); }
+ public static int endSparseMatrixIndexCSX(FlatBufferBuilder builder) {
+ int o = builder.endTable();
+ builder.required(o, 6); // indptrType
+ builder.required(o, 8); // indptrBuffer
+ builder.required(o, 10); // indicesType
+ builder.required(o, 12); // indicesBuffer
+ return o;
+ }
+
+ public static final class Vector extends BaseVector {
+ public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+ public SparseMatrixIndexCSX get(int j) { return get(new SparseMatrixIndexCSX(), j); }
+ public SparseMatrixIndexCSX get(SparseMatrixIndexCSX obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); }
+ }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensor.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensor.java
new file mode 100644
index 000000000..9b4cdf6e8
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensor.java
@@ -0,0 +1,92 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+public final class SparseTensor extends Table {
+ public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+ public static SparseTensor getRootAsSparseTensor(ByteBuffer _bb) { return getRootAsSparseTensor(_bb, new SparseTensor()); }
+ public static SparseTensor getRootAsSparseTensor(ByteBuffer _bb, SparseTensor obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+ public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+ public SparseTensor __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+ public byte typeType() { int o = __offset(4); return o != 0 ? bb.get(o + bb_pos) : 0; }
+ /**
+ * The type of data contained in a value cell.
+ * Currently only fixed-width value types are supported,
+ * no strings or nested types.
+ */
+ public Table type(Table obj) { int o = __offset(6); return o != 0 ? __union(obj, o + bb_pos) : null; }
+ /**
+ * The dimensions of the tensor, optionally named.
+ */
+ public org.apache.arrow.flatbuf.TensorDim shape(int j) { return shape(new org.apache.arrow.flatbuf.TensorDim(), j); }
+ public org.apache.arrow.flatbuf.TensorDim shape(org.apache.arrow.flatbuf.TensorDim obj, int j) { int o = __offset(8); return o != 0 ? obj.__assign(__indirect(__vector(o) + j * 4), bb) : null; }
+ public int shapeLength() { int o = __offset(8); return o != 0 ? __vector_len(o) : 0; }
+ public org.apache.arrow.flatbuf.TensorDim.Vector shapeVector() { return shapeVector(new org.apache.arrow.flatbuf.TensorDim.Vector()); }
+ public org.apache.arrow.flatbuf.TensorDim.Vector shapeVector(org.apache.arrow.flatbuf.TensorDim.Vector obj) { int o = __offset(8); return o != 0 ? obj.__assign(__vector(o), 4, bb) : null; }
+ /**
+ * The number of non-zero values in a sparse tensor.
+ */
+ public long nonZeroLength() { int o = __offset(10); return o != 0 ? bb.getLong(o + bb_pos) : 0L; }
+ public byte sparseIndexType() { int o = __offset(12); return o != 0 ? bb.get(o + bb_pos) : 0; }
+ /**
+ * Sparse tensor index
+ */
+ public Table sparseIndex(Table obj) { int o = __offset(14); return o != 0 ? __union(obj, o + bb_pos) : null; }
+ /**
+ * The location and size of the tensor's data
+ */
+ public org.apache.arrow.flatbuf.Buffer data() { return data(new org.apache.arrow.flatbuf.Buffer()); }
+ public org.apache.arrow.flatbuf.Buffer data(org.apache.arrow.flatbuf.Buffer obj) { int o = __offset(16); return o != 0 ? obj.__assign(o + bb_pos, bb) : null; }
+
+ public static void startSparseTensor(FlatBufferBuilder builder) { builder.startTable(7); }
+ public static void addTypeType(FlatBufferBuilder builder, byte typeType) { builder.addByte(0, typeType, 0); }
+ public static void addType(FlatBufferBuilder builder, int typeOffset) { builder.addOffset(1, typeOffset, 0); }
+ public static void addShape(FlatBufferBuilder builder, int shapeOffset) { builder.addOffset(2, shapeOffset, 0); }
+ public static int createShapeVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addOffset(data[i]); return builder.endVector(); }
+ public static void startShapeVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); }
+ public static void addNonZeroLength(FlatBufferBuilder builder, long nonZeroLength) { builder.addLong(3, nonZeroLength, 0L); }
+ public static void addSparseIndexType(FlatBufferBuilder builder, byte sparseIndexType) { builder.addByte(4, sparseIndexType, 0); }
+ public static void addSparseIndex(FlatBufferBuilder builder, int sparseIndexOffset) { builder.addOffset(5, sparseIndexOffset, 0); }
+ public static void addData(FlatBufferBuilder builder, int dataOffset) { builder.addStruct(6, dataOffset, 0); }
+ public static int endSparseTensor(FlatBufferBuilder builder) {
+ int o = builder.endTable();
+ builder.required(o, 6); // type
+ builder.required(o, 8); // shape
+ builder.required(o, 14); // sparseIndex
+ builder.required(o, 16); // data
+ return o;
+ }
+ public static void finishSparseTensorBuffer(FlatBufferBuilder builder, int offset) { builder.finish(offset); }
+ public static void finishSizePrefixedSparseTensorBuffer(FlatBufferBuilder builder, int offset) { builder.finishSizePrefixed(offset); }
+
+ public static final class Vector extends BaseVector {
+ public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+ public SparseTensor get(int j) { return get(new SparseTensor(), j); }
+ public SparseTensor get(SparseTensor obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); }
+ }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensorIndex.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensorIndex.java
new file mode 100644
index 000000000..5b9444abc
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensorIndex.java
@@ -0,0 +1,32 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+public final class SparseTensorIndex {
+ private SparseTensorIndex() { }
+ public static final byte NONE = 0;
+ public static final byte SparseTensorIndexCOO = 1;
+ public static final byte SparseMatrixIndexCSX = 2;
+ public static final byte SparseTensorIndexCSF = 3;
+
+ public static final String[] names = { "NONE", "SparseTensorIndexCOO", "SparseMatrixIndexCSX", "SparseTensorIndexCSF", };
+
+ public static String name(int e) { return names[e]; }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensorIndexCOO.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensorIndexCOO.java
new file mode 100644
index 000000000..a84238d66
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensorIndexCOO.java
@@ -0,0 +1,118 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * ----------------------------------------------------------------------
+ * EXPERIMENTAL: Data structures for sparse tensors
+ * Coordinate (COO) format of sparse tensor index.
+ *
+ * COO's index list are represented as a NxM matrix,
+ * where N is the number of non-zero values,
+ * and M is the number of dimensions of a sparse tensor.
+ *
+ * indicesBuffer stores the location and size of the data of this indices
+ * matrix. The value type and the stride of the indices matrix is
+ * specified in indicesType and indicesStrides fields.
+ *
+ * For example, let X be a 2x3x4x5 tensor, and it has the following
+ * 6 non-zero values:
+ * ```text
+ * X[0, 1, 2, 0] := 1
+ * X[1, 1, 2, 3] := 2
+ * X[0, 2, 1, 0] := 3
+ * X[0, 1, 3, 0] := 4
+ * X[0, 1, 2, 1] := 5
+ * X[1, 2, 0, 4] := 6
+ * ```
+ * In COO format, the index matrix of X is the following 4x6 matrix:
+ * ```text
+ * [[0, 0, 0, 0, 1, 1],
+ * [1, 1, 1, 2, 1, 2],
+ * [2, 2, 3, 1, 2, 0],
+ * [0, 1, 0, 0, 3, 4]]
+ * ```
+ * When isCanonical is true, the indices is sorted in lexicographical order
+ * (row-major order), and it does not have duplicated entries. Otherwise,
+ * the indices may not be sorted, or may have duplicated entries.
+ */
+public final class SparseTensorIndexCOO extends Table {
+ public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+ public static SparseTensorIndexCOO getRootAsSparseTensorIndexCOO(ByteBuffer _bb) { return getRootAsSparseTensorIndexCOO(_bb, new SparseTensorIndexCOO()); }
+ public static SparseTensorIndexCOO getRootAsSparseTensorIndexCOO(ByteBuffer _bb, SparseTensorIndexCOO obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+ public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+ public SparseTensorIndexCOO __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+ /**
+ * The type of values in indicesBuffer
+ */
+ public org.apache.arrow.flatbuf.Int indicesType() { return indicesType(new org.apache.arrow.flatbuf.Int()); }
+ public org.apache.arrow.flatbuf.Int indicesType(org.apache.arrow.flatbuf.Int obj) { int o = __offset(4); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; }
+ /**
+ * Non-negative byte offsets to advance one value cell along each dimension
+ * If omitted, default to row-major order (C-like).
+ */
+ public long indicesStrides(int j) { int o = __offset(6); return o != 0 ? bb.getLong(__vector(o) + j * 8) : 0; }
+ public int indicesStridesLength() { int o = __offset(6); return o != 0 ? __vector_len(o) : 0; }
+ public LongVector indicesStridesVector() { return indicesStridesVector(new LongVector()); }
+ public LongVector indicesStridesVector(LongVector obj) { int o = __offset(6); return o != 0 ? obj.__assign(__vector(o), bb) : null; }
+ public ByteBuffer indicesStridesAsByteBuffer() { return __vector_as_bytebuffer(6, 8); }
+ public ByteBuffer indicesStridesInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 6, 8); }
+ /**
+ * The location and size of the indices matrix's data
+ */
+ public org.apache.arrow.flatbuf.Buffer indicesBuffer() { return indicesBuffer(new org.apache.arrow.flatbuf.Buffer()); }
+ public org.apache.arrow.flatbuf.Buffer indicesBuffer(org.apache.arrow.flatbuf.Buffer obj) { int o = __offset(8); return o != 0 ? obj.__assign(o + bb_pos, bb) : null; }
+ /**
+ * This flag is true if and only if the indices matrix is sorted in
+ * row-major order, and does not have duplicated entries.
+ * This sort order is the same as of Tensorflow's SparseTensor,
+ * but it is inverse order of SciPy's canonical coo_matrix
+ * (SciPy employs column-major order for its coo_matrix).
+ */
+ public boolean isCanonical() { int o = __offset(10); return o != 0 ? 0!=bb.get(o + bb_pos) : false; }
+
+ public static void startSparseTensorIndexCOO(FlatBufferBuilder builder) { builder.startTable(4); }
+ public static void addIndicesType(FlatBufferBuilder builder, int indicesTypeOffset) { builder.addOffset(0, indicesTypeOffset, 0); }
+ public static void addIndicesStrides(FlatBufferBuilder builder, int indicesStridesOffset) { builder.addOffset(1, indicesStridesOffset, 0); }
+ public static int createIndicesStridesVector(FlatBufferBuilder builder, long[] data) { builder.startVector(8, data.length, 8); for (int i = data.length - 1; i >= 0; i--) builder.addLong(data[i]); return builder.endVector(); }
+ public static void startIndicesStridesVector(FlatBufferBuilder builder, int numElems) { builder.startVector(8, numElems, 8); }
+ public static void addIndicesBuffer(FlatBufferBuilder builder, int indicesBufferOffset) { builder.addStruct(2, indicesBufferOffset, 0); }
+ public static void addIsCanonical(FlatBufferBuilder builder, boolean isCanonical) { builder.addBoolean(3, isCanonical, false); }
+ public static int endSparseTensorIndexCOO(FlatBufferBuilder builder) {
+ int o = builder.endTable();
+ builder.required(o, 4); // indicesType
+ builder.required(o, 8); // indicesBuffer
+ return o;
+ }
+
+ public static final class Vector extends BaseVector {
+ public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+ public SparseTensorIndexCOO get(int j) { return get(new SparseTensorIndexCOO(), j); }
+ public SparseTensorIndexCOO get(SparseTensorIndexCOO obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); }
+ }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensorIndexCSF.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensorIndexCSF.java
new file mode 100644
index 000000000..abc4662be
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensorIndexCSF.java
@@ -0,0 +1,173 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * Compressed Sparse Fiber (CSF) sparse tensor index.
+ */
+public final class SparseTensorIndexCSF extends Table {
+ public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+ public static SparseTensorIndexCSF getRootAsSparseTensorIndexCSF(ByteBuffer _bb) { return getRootAsSparseTensorIndexCSF(_bb, new SparseTensorIndexCSF()); }
+ public static SparseTensorIndexCSF getRootAsSparseTensorIndexCSF(ByteBuffer _bb, SparseTensorIndexCSF obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+ public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+ public SparseTensorIndexCSF __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+ /**
+ * CSF is a generalization of compressed sparse row (CSR) index.
+ * See [smith2017knl](http://shaden.io/pub-files/smith2017knl.pdf)
+ *
+ * CSF index recursively compresses each dimension of a tensor into a set
+ * of prefix trees. Each path from a root to leaf forms one tensor
+ * non-zero index. CSF is implemented with two arrays of buffers and one
+ * arrays of integers.
+ *
+ * For example, let X be a 2x3x4x5 tensor and let it have the following
+ * 8 non-zero values:
+ * ```text
+ * X[0, 0, 0, 1] := 1
+ * X[0, 0, 0, 2] := 2
+ * X[0, 1, 0, 0] := 3
+ * X[0, 1, 0, 2] := 4
+ * X[0, 1, 1, 0] := 5
+ * X[1, 1, 1, 0] := 6
+ * X[1, 1, 1, 1] := 7
+ * X[1, 1, 1, 2] := 8
+ * ```
+ * As a prefix tree this would be represented as:
+ * ```text
+ * 0 1
+ * / \ |
+ * 0 1 1
+ * / / \ |
+ * 0 0 1 1
+ * /| /| | /| |
+ * 1 2 0 2 0 0 1 2
+ * ```
+ * The type of values in indptrBuffers
+ */
+ public org.apache.arrow.flatbuf.Int indptrType() { return indptrType(new org.apache.arrow.flatbuf.Int()); }
+ public org.apache.arrow.flatbuf.Int indptrType(org.apache.arrow.flatbuf.Int obj) { int o = __offset(4); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; }
+ /**
+ * indptrBuffers stores the sparsity structure.
+ * Each two consecutive dimensions in a tensor correspond to a buffer in
+ * indptrBuffers. A pair of consecutive values at `indptrBuffers[dim][i]`
+ * and `indptrBuffers[dim][i + 1]` signify a range of nodes in
+ * `indicesBuffers[dim + 1]` who are children of `indicesBuffers[dim][i]` node.
+ *
+ * For example, the indptrBuffers for the above X is:
+ * ```text
+ * indptrBuffer(X) = [
+ * [0, 2, 3],
+ * [0, 1, 3, 4],
+ * [0, 2, 4, 5, 8]
+ * ].
+ * ```
+ */
+ public org.apache.arrow.flatbuf.Buffer indptrBuffers(int j) { return indptrBuffers(new org.apache.arrow.flatbuf.Buffer(), j); }
+ public org.apache.arrow.flatbuf.Buffer indptrBuffers(org.apache.arrow.flatbuf.Buffer obj, int j) { int o = __offset(6); return o != 0 ? obj.__assign(__vector(o) + j * 16, bb) : null; }
+ public int indptrBuffersLength() { int o = __offset(6); return o != 0 ? __vector_len(o) : 0; }
+ public org.apache.arrow.flatbuf.Buffer.Vector indptrBuffersVector() { return indptrBuffersVector(new org.apache.arrow.flatbuf.Buffer.Vector()); }
+ public org.apache.arrow.flatbuf.Buffer.Vector indptrBuffersVector(org.apache.arrow.flatbuf.Buffer.Vector obj) { int o = __offset(6); return o != 0 ? obj.__assign(__vector(o), 16, bb) : null; }
+ /**
+ * The type of values in indicesBuffers
+ */
+ public org.apache.arrow.flatbuf.Int indicesType() { return indicesType(new org.apache.arrow.flatbuf.Int()); }
+ public org.apache.arrow.flatbuf.Int indicesType(org.apache.arrow.flatbuf.Int obj) { int o = __offset(8); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; }
+ /**
+ * indicesBuffers stores values of nodes.
+ * Each tensor dimension corresponds to a buffer in indicesBuffers.
+ * For example, the indicesBuffers for the above X is:
+ * ```text
+ * indicesBuffer(X) = [
+ * [0, 1],
+ * [0, 1, 1],
+ * [0, 0, 1, 1],
+ * [1, 2, 0, 2, 0, 0, 1, 2]
+ * ].
+ * ```
+ */
+ public org.apache.arrow.flatbuf.Buffer indicesBuffers(int j) { return indicesBuffers(new org.apache.arrow.flatbuf.Buffer(), j); }
+ public org.apache.arrow.flatbuf.Buffer indicesBuffers(org.apache.arrow.flatbuf.Buffer obj, int j) { int o = __offset(10); return o != 0 ? obj.__assign(__vector(o) + j * 16, bb) : null; }
+ public int indicesBuffersLength() { int o = __offset(10); return o != 0 ? __vector_len(o) : 0; }
+ public org.apache.arrow.flatbuf.Buffer.Vector indicesBuffersVector() { return indicesBuffersVector(new org.apache.arrow.flatbuf.Buffer.Vector()); }
+ public org.apache.arrow.flatbuf.Buffer.Vector indicesBuffersVector(org.apache.arrow.flatbuf.Buffer.Vector obj) { int o = __offset(10); return o != 0 ? obj.__assign(__vector(o), 16, bb) : null; }
+ /**
+ * axisOrder stores the sequence in which dimensions were traversed to
+ * produce the prefix tree.
+ * For example, the axisOrder for the above X is:
+ * ```text
+ * axisOrder(X) = [0, 1, 2, 3].
+ * ```
+ */
+ public int axisOrder(int j) { int o = __offset(12); return o != 0 ? bb.getInt(__vector(o) + j * 4) : 0; }
+ public int axisOrderLength() { int o = __offset(12); return o != 0 ? __vector_len(o) : 0; }
+ public IntVector axisOrderVector() { return axisOrderVector(new IntVector()); }
+ public IntVector axisOrderVector(IntVector obj) { int o = __offset(12); return o != 0 ? obj.__assign(__vector(o), bb) : null; }
+ public ByteBuffer axisOrderAsByteBuffer() { return __vector_as_bytebuffer(12, 4); }
+ public ByteBuffer axisOrderInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 12, 4); }
+
+ public static int createSparseTensorIndexCSF(FlatBufferBuilder builder,
+ int indptrTypeOffset,
+ int indptrBuffersOffset,
+ int indicesTypeOffset,
+ int indicesBuffersOffset,
+ int axisOrderOffset) {
+ builder.startTable(5);
+ SparseTensorIndexCSF.addAxisOrder(builder, axisOrderOffset);
+ SparseTensorIndexCSF.addIndicesBuffers(builder, indicesBuffersOffset);
+ SparseTensorIndexCSF.addIndicesType(builder, indicesTypeOffset);
+ SparseTensorIndexCSF.addIndptrBuffers(builder, indptrBuffersOffset);
+ SparseTensorIndexCSF.addIndptrType(builder, indptrTypeOffset);
+ return SparseTensorIndexCSF.endSparseTensorIndexCSF(builder);
+ }
+
+ public static void startSparseTensorIndexCSF(FlatBufferBuilder builder) { builder.startTable(5); }
+ public static void addIndptrType(FlatBufferBuilder builder, int indptrTypeOffset) { builder.addOffset(0, indptrTypeOffset, 0); }
+ public static void addIndptrBuffers(FlatBufferBuilder builder, int indptrBuffersOffset) { builder.addOffset(1, indptrBuffersOffset, 0); }
+ public static void startIndptrBuffersVector(FlatBufferBuilder builder, int numElems) { builder.startVector(16, numElems, 8); }
+ public static void addIndicesType(FlatBufferBuilder builder, int indicesTypeOffset) { builder.addOffset(2, indicesTypeOffset, 0); }
+ public static void addIndicesBuffers(FlatBufferBuilder builder, int indicesBuffersOffset) { builder.addOffset(3, indicesBuffersOffset, 0); }
+ public static void startIndicesBuffersVector(FlatBufferBuilder builder, int numElems) { builder.startVector(16, numElems, 8); }
+ public static void addAxisOrder(FlatBufferBuilder builder, int axisOrderOffset) { builder.addOffset(4, axisOrderOffset, 0); }
+ public static int createAxisOrderVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addInt(data[i]); return builder.endVector(); }
+ public static void startAxisOrderVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); }
+ public static int endSparseTensorIndexCSF(FlatBufferBuilder builder) {
+ int o = builder.endTable();
+ builder.required(o, 4); // indptrType
+ builder.required(o, 6); // indptrBuffers
+ builder.required(o, 8); // indicesType
+ builder.required(o, 10); // indicesBuffers
+ builder.required(o, 12); // axisOrder
+ return o;
+ }
+
+ public static final class Vector extends BaseVector {
+ public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+ public SparseTensorIndexCSF get(int j) { return get(new SparseTensorIndexCSF(), j); }
+ public SparseTensorIndexCSF get(SparseTensorIndexCSF obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); }
+ }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Struct_.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Struct_.java
new file mode 100644
index 000000000..1285f2884
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Struct_.java
@@ -0,0 +1,53 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * A Struct_ in the flatbuffer metadata is the same as an Arrow Struct
+ * (according to the physical memory layout). We used Struct_ here as
+ * Struct is a reserved word in Flatbuffers
+ */
+public final class Struct_ extends Table {
+ public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+ public static Struct_ getRootAsStruct_(ByteBuffer _bb) { return getRootAsStruct_(_bb, new Struct_()); }
+ public static Struct_ getRootAsStruct_(ByteBuffer _bb, Struct_ obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+ public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+ public Struct_ __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+
+ public static void startStruct_(FlatBufferBuilder builder) { builder.startTable(0); }
+ public static int endStruct_(FlatBufferBuilder builder) {
+ int o = builder.endTable();
+ return o;
+ }
+
+ public static final class Vector extends BaseVector {
+ public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+ public Struct_ get(int j) { return get(new Struct_(), j); }
+ public Struct_ get(Struct_ obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); }
+ }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Tensor.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Tensor.java
new file mode 100644
index 000000000..d4466bcf2
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Tensor.java
@@ -0,0 +1,91 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+public final class Tensor extends Table {
+ public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+ public static Tensor getRootAsTensor(ByteBuffer _bb) { return getRootAsTensor(_bb, new Tensor()); }
+ public static Tensor getRootAsTensor(ByteBuffer _bb, Tensor obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+ public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+ public Tensor __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+ public byte typeType() { int o = __offset(4); return o != 0 ? bb.get(o + bb_pos) : 0; }
+ /**
+ * The type of data contained in a value cell. Currently only fixed-width
+ * value types are supported, no strings or nested types
+ */
+ public Table type(Table obj) { int o = __offset(6); return o != 0 ? __union(obj, o + bb_pos) : null; }
+ /**
+ * The dimensions of the tensor, optionally named
+ */
+ public org.apache.arrow.flatbuf.TensorDim shape(int j) { return shape(new org.apache.arrow.flatbuf.TensorDim(), j); }
+ public org.apache.arrow.flatbuf.TensorDim shape(org.apache.arrow.flatbuf.TensorDim obj, int j) { int o = __offset(8); return o != 0 ? obj.__assign(__indirect(__vector(o) + j * 4), bb) : null; }
+ public int shapeLength() { int o = __offset(8); return o != 0 ? __vector_len(o) : 0; }
+ public org.apache.arrow.flatbuf.TensorDim.Vector shapeVector() { return shapeVector(new org.apache.arrow.flatbuf.TensorDim.Vector()); }
+ public org.apache.arrow.flatbuf.TensorDim.Vector shapeVector(org.apache.arrow.flatbuf.TensorDim.Vector obj) { int o = __offset(8); return o != 0 ? obj.__assign(__vector(o), 4, bb) : null; }
+ /**
+ * Non-negative byte offsets to advance one value cell along each dimension
+ * If omitted, default to row-major order (C-like).
+ */
+ public long strides(int j) { int o = __offset(10); return o != 0 ? bb.getLong(__vector(o) + j * 8) : 0; }
+ public int stridesLength() { int o = __offset(10); return o != 0 ? __vector_len(o) : 0; }
+ public LongVector stridesVector() { return stridesVector(new LongVector()); }
+ public LongVector stridesVector(LongVector obj) { int o = __offset(10); return o != 0 ? obj.__assign(__vector(o), bb) : null; }
+ public ByteBuffer stridesAsByteBuffer() { return __vector_as_bytebuffer(10, 8); }
+ public ByteBuffer stridesInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 10, 8); }
+ /**
+ * The location and size of the tensor's data
+ */
+ public org.apache.arrow.flatbuf.Buffer data() { return data(new org.apache.arrow.flatbuf.Buffer()); }
+ public org.apache.arrow.flatbuf.Buffer data(org.apache.arrow.flatbuf.Buffer obj) { int o = __offset(12); return o != 0 ? obj.__assign(o + bb_pos, bb) : null; }
+
+ public static void startTensor(FlatBufferBuilder builder) { builder.startTable(5); }
+ public static void addTypeType(FlatBufferBuilder builder, byte typeType) { builder.addByte(0, typeType, 0); }
+ public static void addType(FlatBufferBuilder builder, int typeOffset) { builder.addOffset(1, typeOffset, 0); }
+ public static void addShape(FlatBufferBuilder builder, int shapeOffset) { builder.addOffset(2, shapeOffset, 0); }
+ public static int createShapeVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addOffset(data[i]); return builder.endVector(); }
+ public static void startShapeVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); }
+ public static void addStrides(FlatBufferBuilder builder, int stridesOffset) { builder.addOffset(3, stridesOffset, 0); }
+ public static int createStridesVector(FlatBufferBuilder builder, long[] data) { builder.startVector(8, data.length, 8); for (int i = data.length - 1; i >= 0; i--) builder.addLong(data[i]); return builder.endVector(); }
+ public static void startStridesVector(FlatBufferBuilder builder, int numElems) { builder.startVector(8, numElems, 8); }
+ public static void addData(FlatBufferBuilder builder, int dataOffset) { builder.addStruct(4, dataOffset, 0); }
+ public static int endTensor(FlatBufferBuilder builder) {
+ int o = builder.endTable();
+ builder.required(o, 6); // type
+ builder.required(o, 8); // shape
+ builder.required(o, 12); // data
+ return o;
+ }
+ public static void finishTensorBuffer(FlatBufferBuilder builder, int offset) { builder.finish(offset); }
+ public static void finishSizePrefixedTensorBuffer(FlatBufferBuilder builder, int offset) { builder.finishSizePrefixed(offset); }
+
+ public static final class Vector extends BaseVector {
+ public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+ public Tensor get(int j) { return get(new Tensor(), j); }
+ public Tensor get(Tensor obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); }
+ }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/TensorDim.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/TensorDim.java
new file mode 100644
index 000000000..fad8caacd
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/TensorDim.java
@@ -0,0 +1,74 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * ----------------------------------------------------------------------
+ * Data structures for dense tensors
+ * Shape data for a single axis in a tensor
+ */
+public final class TensorDim extends Table {
+ public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+ public static TensorDim getRootAsTensorDim(ByteBuffer _bb) { return getRootAsTensorDim(_bb, new TensorDim()); }
+ public static TensorDim getRootAsTensorDim(ByteBuffer _bb, TensorDim obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+ public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+ public TensorDim __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+ /**
+ * Length of dimension
+ */
+ public long size() { int o = __offset(4); return o != 0 ? bb.getLong(o + bb_pos) : 0L; }
+ /**
+ * Name of the dimension, optional
+ */
+ public String name() { int o = __offset(6); return o != 0 ? __string(o + bb_pos) : null; }
+ public ByteBuffer nameAsByteBuffer() { return __vector_as_bytebuffer(6, 1); }
+ public ByteBuffer nameInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 6, 1); }
+
+ public static int createTensorDim(FlatBufferBuilder builder,
+ long size,
+ int nameOffset) {
+ builder.startTable(2);
+ TensorDim.addSize(builder, size);
+ TensorDim.addName(builder, nameOffset);
+ return TensorDim.endTensorDim(builder);
+ }
+
+ public static void startTensorDim(FlatBufferBuilder builder) { builder.startTable(2); }
+ public static void addSize(FlatBufferBuilder builder, long size) { builder.addLong(0, size, 0L); }
+ public static void addName(FlatBufferBuilder builder, int nameOffset) { builder.addOffset(1, nameOffset, 0); }
+ public static int endTensorDim(FlatBufferBuilder builder) {
+ int o = builder.endTable();
+ return o;
+ }
+
+ public static final class Vector extends BaseVector {
+ public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+ public TensorDim get(int j) { return get(new TensorDim(), j); }
+ public TensorDim get(TensorDim obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); }
+ }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Time.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Time.java
new file mode 100644
index 000000000..596d403a3
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Time.java
@@ -0,0 +1,66 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * Time type. The physical storage type depends on the unit
+ * - SECOND and MILLISECOND: 32 bits
+ * - MICROSECOND and NANOSECOND: 64 bits
+ */
+public final class Time extends Table {
+ public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+ public static Time getRootAsTime(ByteBuffer _bb) { return getRootAsTime(_bb, new Time()); }
+ public static Time getRootAsTime(ByteBuffer _bb, Time obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+ public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+ public Time __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+ public short unit() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 1; }
+ public int bitWidth() { int o = __offset(6); return o != 0 ? bb.getInt(o + bb_pos) : 32; }
+
+ public static int createTime(FlatBufferBuilder builder,
+ short unit,
+ int bitWidth) {
+ builder.startTable(2);
+ Time.addBitWidth(builder, bitWidth);
+ Time.addUnit(builder, unit);
+ return Time.endTime(builder);
+ }
+
+ public static void startTime(FlatBufferBuilder builder) { builder.startTable(2); }
+ public static void addUnit(FlatBufferBuilder builder, short unit) { builder.addShort(0, unit, 1); }
+ public static void addBitWidth(FlatBufferBuilder builder, int bitWidth) { builder.addInt(1, bitWidth, 32); }
+ public static int endTime(FlatBufferBuilder builder) {
+ int o = builder.endTable();
+ return o;
+ }
+
+ public static final class Vector extends BaseVector {
+ public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+ public Time get(int j) { return get(new Time(), j); }
+ public Time get(Time obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); }
+ }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/TimeUnit.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/TimeUnit.java
new file mode 100644
index 000000000..828e44c13
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/TimeUnit.java
@@ -0,0 +1,32 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+public final class TimeUnit {
+ private TimeUnit() { }
+ public static final short SECOND = 0;
+ public static final short MILLISECOND = 1;
+ public static final short MICROSECOND = 2;
+ public static final short NANOSECOND = 3;
+
+ public static final String[] names = { "SECOND", "MILLISECOND", "MICROSECOND", "NANOSECOND", };
+
+ public static String name(int e) { return names[e]; }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Timestamp.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Timestamp.java
new file mode 100644
index 000000000..041452607
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Timestamp.java
@@ -0,0 +1,116 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * Time elapsed from the Unix epoch, 00:00:00.000 on 1 January 1970, excluding
+ * leap seconds, as a 64-bit integer. Note that UNIX time does not include
+ * leap seconds.
+ *
+ * Date & time libraries often have multiple different data types for temporal
+ * data. In order to ease interoperability between different implementations the
+ * Arrow project has some recommendations for encoding these types into a Timestamp
+ * column.
+ *
+ * An "instant" represents a single moment in time that has no meaningful time zone
+ * or the time zone is unknown. A column of instants can also contain values from
+ * multiple time zones. To encode an instant set the timezone string to "UTC".
+ *
+ * A "zoned date-time" represents a single moment in time that has a meaningful
+ * reference time zone. To encode a zoned date-time as a Timestamp set the timezone
+ * string to the name of the timezone. There is some ambiguity between an instant
+ * and a zoned date-time with the UTC time zone. Both of these are stored the same.
+ * Typically, this distinction does not matter. If it does, then an application should
+ * use custom metadata or an extension type to distinguish between the two cases.
+ *
+ * An "offset date-time" represents a single moment in time combined with a meaningful
+ * offset from UTC. To encode an offset date-time as a Timestamp set the timezone string
+ * to the numeric time zone offset string (e.g. "+03:00").
+ *
+ * A "local date-time" does not represent a single moment in time. It represents a wall
+ * clock time combined with a date. Because of daylight savings time there may multiple
+ * instants that correspond to a single local date-time in any given time zone. A
+ * local date-time is often stored as a struct or a Date32/Time64 pair. However, it can
+ * also be encoded into a Timestamp column. To do so the value should be the the time
+ * elapsed from the Unix epoch so that a wall clock in UTC would display the desired time.
+ * The timezone string should be set to null or the empty string.
+ */
+public final class Timestamp extends Table {
+ public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+ public static Timestamp getRootAsTimestamp(ByteBuffer _bb) { return getRootAsTimestamp(_bb, new Timestamp()); }
+ public static Timestamp getRootAsTimestamp(ByteBuffer _bb, Timestamp obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+ public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+ public Timestamp __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+ public short unit() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 0; }
+ /**
+ * The time zone is a string indicating the name of a time zone, one of:
+ *
+ * * As used in the Olson time zone database (the "tz database" or
+ * "tzdata"), such as "America/New_York"
+ * * An absolute time zone offset of the form +XX:XX or -XX:XX, such as +07:30
+ *
+ * Whether a timezone string is present indicates different semantics about
+ * the data:
+ *
+ * * If the time zone is null or an empty string, the data is a local date-time
+ * and does not represent a single moment in time. Instead it represents a wall clock
+ * time and care should be taken to avoid interpreting it semantically as an instant.
+ *
+ * * If the time zone is set to a valid value, values can be displayed as
+ * "localized" to that time zone, even though the underlying 64-bit
+ * integers are identical to the same data stored in UTC. Converting
+ * between time zones is a metadata-only operation and does not change the
+ * underlying values
+ */
+ public String timezone() { int o = __offset(6); return o != 0 ? __string(o + bb_pos) : null; }
+ public ByteBuffer timezoneAsByteBuffer() { return __vector_as_bytebuffer(6, 1); }
+ public ByteBuffer timezoneInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 6, 1); }
+
+ public static int createTimestamp(FlatBufferBuilder builder,
+ short unit,
+ int timezoneOffset) {
+ builder.startTable(2);
+ Timestamp.addTimezone(builder, timezoneOffset);
+ Timestamp.addUnit(builder, unit);
+ return Timestamp.endTimestamp(builder);
+ }
+
+ public static void startTimestamp(FlatBufferBuilder builder) { builder.startTable(2); }
+ public static void addUnit(FlatBufferBuilder builder, short unit) { builder.addShort(0, unit, 0); }
+ public static void addTimezone(FlatBufferBuilder builder, int timezoneOffset) { builder.addOffset(1, timezoneOffset, 0); }
+ public static int endTimestamp(FlatBufferBuilder builder) {
+ int o = builder.endTable();
+ return o;
+ }
+
+ public static final class Vector extends BaseVector {
+ public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+ public Timestamp get(int j) { return get(new Timestamp(), j); }
+ public Timestamp get(Timestamp obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); }
+ }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Type.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Type.java
new file mode 100644
index 000000000..5f1a550cf
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Type.java
@@ -0,0 +1,55 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+/**
+ * ----------------------------------------------------------------------
+ * Top-level Type value, enabling extensible type-specific metadata. We can
+ * add new logical types to Type without breaking backwards compatibility
+ */
+public final class Type {
+ private Type() { }
+ public static final byte NONE = 0;
+ public static final byte Null = 1;
+ public static final byte Int = 2;
+ public static final byte FloatingPoint = 3;
+ public static final byte Binary = 4;
+ public static final byte Utf8 = 5;
+ public static final byte Bool = 6;
+ public static final byte Decimal = 7;
+ public static final byte Date = 8;
+ public static final byte Time = 9;
+ public static final byte Timestamp = 10;
+ public static final byte Interval = 11;
+ public static final byte List = 12;
+ public static final byte Struct_ = 13;
+ public static final byte Union = 14;
+ public static final byte FixedSizeBinary = 15;
+ public static final byte FixedSizeList = 16;
+ public static final byte Map = 17;
+ public static final byte Duration = 18;
+ public static final byte LargeBinary = 19;
+ public static final byte LargeUtf8 = 20;
+ public static final byte LargeList = 21;
+
+ public static final String[] names = { "NONE", "Null", "Int", "FloatingPoint", "Binary", "Utf8", "Bool", "Decimal", "Date", "Time", "Timestamp", "Interval", "List", "Struct_", "Union", "FixedSizeBinary", "FixedSizeList", "Map", "Duration", "LargeBinary", "LargeUtf8", "LargeList", };
+
+ public static String name(int e) { return names[e]; }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Union.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Union.java
new file mode 100644
index 000000000..7e2822434
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Union.java
@@ -0,0 +1,74 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * A union is a complex type with children in Field
+ * By default ids in the type vector refer to the offsets in the children
+ * optionally typeIds provides an indirection between the child offset and the type id
+ * for each child `typeIds[offset]` is the id used in the type vector
+ */
+public final class Union extends Table {
+ public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+ public static Union getRootAsUnion(ByteBuffer _bb) { return getRootAsUnion(_bb, new Union()); }
+ public static Union getRootAsUnion(ByteBuffer _bb, Union obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+ public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+ public Union __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+ public short mode() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 0; }
+ public int typeIds(int j) { int o = __offset(6); return o != 0 ? bb.getInt(__vector(o) + j * 4) : 0; }
+ public int typeIdsLength() { int o = __offset(6); return o != 0 ? __vector_len(o) : 0; }
+ public IntVector typeIdsVector() { return typeIdsVector(new IntVector()); }
+ public IntVector typeIdsVector(IntVector obj) { int o = __offset(6); return o != 0 ? obj.__assign(__vector(o), bb) : null; }
+ public ByteBuffer typeIdsAsByteBuffer() { return __vector_as_bytebuffer(6, 4); }
+ public ByteBuffer typeIdsInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 6, 4); }
+
+ public static int createUnion(FlatBufferBuilder builder,
+ short mode,
+ int typeIdsOffset) {
+ builder.startTable(2);
+ Union.addTypeIds(builder, typeIdsOffset);
+ Union.addMode(builder, mode);
+ return Union.endUnion(builder);
+ }
+
+ public static void startUnion(FlatBufferBuilder builder) { builder.startTable(2); }
+ public static void addMode(FlatBufferBuilder builder, short mode) { builder.addShort(0, mode, 0); }
+ public static void addTypeIds(FlatBufferBuilder builder, int typeIdsOffset) { builder.addOffset(1, typeIdsOffset, 0); }
+ public static int createTypeIdsVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addInt(data[i]); return builder.endVector(); }
+ public static void startTypeIdsVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); }
+ public static int endUnion(FlatBufferBuilder builder) {
+ int o = builder.endTable();
+ return o;
+ }
+
+ public static final class Vector extends BaseVector {
+ public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+ public Union get(int j) { return get(new Union(), j); }
+ public Union get(Union obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); }
+ }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/UnionMode.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/UnionMode.java
new file mode 100644
index 000000000..23a6013f8
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/UnionMode.java
@@ -0,0 +1,30 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+public final class UnionMode {
+ private UnionMode() { }
+ public static final short Sparse = 0;
+ public static final short Dense = 1;
+
+ public static final String[] names = { "Sparse", "Dense", };
+
+ public static String name(int e) { return names[e]; }
+}
+
diff --git a/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Utf8.java b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Utf8.java
new file mode 100644
index 000000000..d77fe205f
--- /dev/null
+++ b/src/arrow/java/format/src/main/java/org/apache/arrow/flatbuf/Utf8.java
@@ -0,0 +1,51 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// automatically generated by the FlatBuffers compiler, do not modify
+
+package org.apache.arrow.flatbuf;
+
+import java.nio.*;
+import java.lang.*;
+import java.util.*;
+import com.google.flatbuffers.*;
+
+@SuppressWarnings("unused")
+/**
+ * Unicode with UTF-8 encoding
+ */
+public final class Utf8 extends Table {
+ public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
+ public static Utf8 getRootAsUtf8(ByteBuffer _bb) { return getRootAsUtf8(_bb, new Utf8()); }
+ public static Utf8 getRootAsUtf8(ByteBuffer _bb, Utf8 obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+ public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
+ public Utf8 __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+
+ public static void startUtf8(FlatBufferBuilder builder) { builder.startTable(0); }
+ public static int endUtf8(FlatBufferBuilder builder) {
+ int o = builder.endTable();
+ return o;
+ }
+
+ public static final class Vector extends BaseVector {
+ public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; }
+
+ public Utf8 get(int j) { return get(new Utf8(), j); }
+ public Utf8 get(Utf8 obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); }
+ }
+}
+
diff --git a/src/arrow/java/gandiva/CMakeLists.txt b/src/arrow/java/gandiva/CMakeLists.txt
new file mode 100644
index 000000000..5010daf79
--- /dev/null
+++ b/src/arrow/java/gandiva/CMakeLists.txt
@@ -0,0 +1,55 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+project(gandiva_java)
+
+# Find java/jni
+include(FindJava)
+include(UseJava)
+include(FindJNI)
+
+message("generating headers to ${JNI_HEADERS_DIR}/jni")
+
+# generate_native_headers is available only from java8
+# centos5 does not have java8 images, so supporting java 7 too.
+# unfortunately create_javah does not work in java8 correctly.
+if(ARROW_GANDIVA_JAVA7)
+ add_jar(gandiva_java
+ src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java
+ src/main/java/org/apache/arrow/gandiva/evaluator/JniWrapper.java
+ src/main/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistryJniHelper.java
+ src/main/java/org/apache/arrow/gandiva/exceptions/GandivaException.java)
+
+ create_javah(TARGET gandiva_jni_headers
+ CLASSES org.apache.arrow.gandiva.evaluator.ConfigurationBuilder
+ org.apache.arrow.gandiva.evaluator.JniWrapper
+ org.apache.arrow.gandiva.evaluator.ExpressionRegistryJniHelper
+ org.apache.arrow.gandiva.exceptions.GandivaException
+ DEPENDS gandiva_java
+ CLASSPATH gandiva_java
+ OUTPUT_DIR ${JNI_HEADERS_DIR}/jni)
+else()
+ add_jar(gandiva_java
+ src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java
+ src/main/java/org/apache/arrow/gandiva/evaluator/JniWrapper.java
+ src/main/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistryJniHelper.java
+ src/main/java/org/apache/arrow/gandiva/exceptions/GandivaException.java
+ GENERATE_NATIVE_HEADERS
+ gandiva_jni_headers
+ DESTINATION
+ ${JNI_HEADERS_DIR}/jni)
+endif()
diff --git a/src/arrow/java/gandiva/README.md b/src/arrow/java/gandiva/README.md
new file mode 100644
index 000000000..22a292eaf
--- /dev/null
+++ b/src/arrow/java/gandiva/README.md
@@ -0,0 +1,32 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+# Gandiva Java
+
+## Setup Build Environment
+
+install:
+ - java 7 or later
+ - maven 3.3 or later
+
+## Building and running tests
+
+```
+cd java
+mvn install -Dgandiva.cpp.build.dir=<path_to_cpp_artifact_directory>
+```
diff --git a/src/arrow/java/gandiva/pom.xml b/src/arrow/java/gandiva/pom.xml
new file mode 100644
index 000000000..81caf12f5
--- /dev/null
+++ b/src/arrow/java/gandiva/pom.xml
@@ -0,0 +1,153 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+ license agreements. See the NOTICE file distributed with this work for additional
+ information regarding copyright ownership. The ASF licenses this file to
+ You under the Apache License, Version 2.0 (the "License"); you may not use
+ this file except in compliance with the License. You may obtain a copy of
+ the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+ by applicable law or agreed to in writing, software distributed under the
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ OF ANY KIND, either express or implied. See the License for the specific
+ language governing permissions and limitations under the License. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-java-root</artifactId>
+ <version>6.0.1</version>
+ </parent>
+
+ <groupId>org.apache.arrow.gandiva</groupId>
+ <artifactId>arrow-gandiva</artifactId>
+ <packaging>jar</packaging>
+ <name>Arrow Gandiva</name>
+ <description>Java wrappers around the native Gandiva SQL expression compiler.</description>
+ <properties>
+ <maven.compiler.source>1.8</maven.compiler.source>
+ <maven.compiler.target>1.8</maven.compiler.target>
+ <protobuf.version>2.5.0</protobuf.version>
+ <checkstyle.failOnViolation>true</checkstyle.failOnViolation>
+ <arrow.cpp.build.dir>../../../cpp/release-build</arrow.cpp.build.dir>
+ </properties>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-memory-core</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-memory-netty</artifactId>
+ <version>${project.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-vector</artifactId>
+ <version>${project.version}</version>
+ <classifier>${arrow.vector.classifier}</classifier>
+ </dependency>
+ <dependency>
+ <groupId>com.google.protobuf</groupId>
+ <artifactId>protobuf-java</artifactId>
+ <version>${protobuf.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ </dependency>
+ </dependencies>
+ <profiles>
+ <profile>
+ <id>release</id>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-source-plugin</artifactId>
+ <version>2.2.1</version>
+ <executions>
+ <execution>
+ <id>attach-sources</id>
+ <goals>
+ <goal>jar-no-fork</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-javadoc-plugin</artifactId>
+ <version>2.9.1</version>
+ <executions>
+ <execution>
+ <id>attach-javadocs</id>
+ <goals>
+ <goal>jar</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-gpg-plugin</artifactId>
+ <version>1.5</version>
+ <executions>
+ <execution>
+ <id>sign-artifacts</id>
+ <phase>verify</phase>
+ <goals>
+ <goal>sign</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+ </profile>
+ </profiles>
+ <build>
+ <resources>
+ <resource>
+ <directory>${arrow.cpp.build.dir}</directory>
+ <includes>
+ <include>**/gandiva_jni.*</include>
+ <include>**/libgandiva_jni.*</include>
+ </includes>
+ </resource>
+ </resources>
+
+ <extensions>
+ <extension>
+ <groupId>kr.motd.maven</groupId>
+ <artifactId>os-maven-plugin</artifactId>
+ <version>1.5.0.Final</version>
+ </extension>
+ </extensions>
+ <plugins>
+ <plugin>
+ <groupId>org.xolstice.maven.plugins</groupId>
+ <artifactId>protobuf-maven-plugin</artifactId>
+ <version>0.5.1</version>
+ <configuration>
+ <protocArtifact>com.google.protobuf:protoc:${protobuf.version}:exe:${os.detected.classifier}
+ </protocArtifact>
+ <protoSourceRoot>../../cpp/src/gandiva/proto</protoSourceRoot>
+ </configuration>
+ <executions>
+ <execution>
+ <goals>
+ <goal>compile</goal>
+ <goal>test-compile</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+
+</project>
diff --git a/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java
new file mode 100644
index 000000000..e903b4e87
--- /dev/null
+++ b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.evaluator;
+
+import java.util.Objects;
+
+/**
+ * Used to construct gandiva configuration objects.
+ */
+public class ConfigurationBuilder {
+
+ public long buildConfigInstance(ConfigOptions configOptions) {
+ return buildConfigInstance(configOptions.optimize, configOptions.targetCPU);
+ }
+
+ private native long buildConfigInstance(boolean optimize, boolean detectHostCPU);
+
+ public native void releaseConfigInstance(long configId);
+
+ /**
+ * ConfigOptions contains the configuration parameters to provide to gandiva.
+ */
+ public static class ConfigOptions {
+ private boolean optimize = true;
+ private boolean targetCPU = true;
+
+ public static ConfigOptions getDefault() {
+ return new ConfigOptions();
+ }
+
+ public ConfigOptions() {}
+
+ public ConfigOptions withOptimize(boolean optimize) {
+ this.optimize = optimize;
+ return this;
+ }
+
+ public ConfigOptions withTargetCPU(boolean targetCPU) {
+ this.targetCPU = targetCPU;
+ return this;
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(optimize, targetCPU);
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (!(obj instanceof ConfigOptions)) {
+ return false;
+ }
+ return this.optimize == ((ConfigOptions) obj).optimize &&
+ this.targetCPU == ((ConfigOptions) obj).targetCPU;
+ }
+ }
+}
diff --git a/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/DecimalTypeUtil.java b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/DecimalTypeUtil.java
new file mode 100644
index 000000000..e0c072cfb
--- /dev/null
+++ b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/DecimalTypeUtil.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.evaluator;
+
+import org.apache.arrow.vector.types.pojo.ArrowType.Decimal;
+
+/**
+ * Utility methods for working with {@link Decimal} values.
+ */
+public class DecimalTypeUtil {
+ private DecimalTypeUtil() {}
+
+ /**
+ * Enum for supported mathematical operations.
+ */
+ public enum OperationType {
+ ADD,
+ SUBTRACT,
+ MULTIPLY,
+ DIVIDE,
+ MOD
+ }
+
+ private static final int MIN_ADJUSTED_SCALE = 6;
+ /// The maximum precision representable by a 16-byte decimal
+ private static final int MAX_PRECISION = 38;
+
+ /**
+ * Determines the scale and precision of applying the given operation to the operands.
+ */
+ public static Decimal getResultTypeForOperation(OperationType operation, Decimal operand1, Decimal
+ operand2) {
+ int s1 = operand1.getScale();
+ int s2 = operand2.getScale();
+ int p1 = operand1.getPrecision();
+ int p2 = operand2.getPrecision();
+ int resultScale = 0;
+ int resultPrecision = 0;
+ switch (operation) {
+ case ADD:
+ case SUBTRACT:
+ resultScale = Math.max(operand1.getScale(), operand2.getScale());
+ resultPrecision = resultScale + Math.max(operand1.getPrecision() - operand1.getScale(),
+ operand2.getPrecision() - operand2.getScale()) + 1;
+ break;
+ case MULTIPLY:
+ resultScale = s1 + s2;
+ resultPrecision = p1 + p2 + 1;
+ break;
+ case DIVIDE:
+ resultScale =
+ Math.max(MIN_ADJUSTED_SCALE, operand1.getScale() + operand2.getPrecision() + 1);
+ resultPrecision =
+ operand1.getPrecision() - operand1.getScale() + operand2.getScale() + resultScale;
+ break;
+ case MOD:
+ resultScale = Math.max(operand1.getScale(), operand2.getScale());
+ resultPrecision = Math.min(operand1.getPrecision() - operand1.getScale(),
+ operand2.getPrecision() - operand2.getScale()) +
+ resultScale;
+ break;
+ default:
+ throw new RuntimeException("Needs support");
+ }
+ return adjustScaleIfNeeded(resultPrecision, resultScale);
+ }
+
+ private static Decimal adjustScaleIfNeeded(int precision, int scale) {
+ if (precision > MAX_PRECISION) {
+ int minScale = Math.min(scale, MIN_ADJUSTED_SCALE);
+ int delta = precision - MAX_PRECISION;
+ precision = MAX_PRECISION;
+ scale = Math.max(scale - delta, minScale);
+ }
+ return new Decimal(precision, scale, 128);
+ }
+
+}
+
diff --git a/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistry.java b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistry.java
new file mode 100644
index 000000000..0155af082
--- /dev/null
+++ b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistry.java
@@ -0,0 +1,220 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.evaluator;
+
+import java.util.List;
+import java.util.Set;
+
+import org.apache.arrow.gandiva.exceptions.GandivaException;
+import org.apache.arrow.gandiva.ipc.GandivaTypes;
+import org.apache.arrow.gandiva.ipc.GandivaTypes.ExtGandivaType;
+import org.apache.arrow.gandiva.ipc.GandivaTypes.GandivaDataTypes;
+import org.apache.arrow.gandiva.ipc.GandivaTypes.GandivaFunctions;
+import org.apache.arrow.gandiva.ipc.GandivaTypes.GandivaType;
+import org.apache.arrow.vector.types.DateUnit;
+import org.apache.arrow.vector.types.FloatingPointPrecision;
+import org.apache.arrow.vector.types.IntervalUnit;
+import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+import com.google.protobuf.InvalidProtocolBufferException;
+
+/**
+ * Used to get the functions and data types supported by
+ * Gandiva.
+ * All types are in Arrow namespace.
+ */
+public class ExpressionRegistry {
+
+ private static final int BIT_WIDTH8 = 8;
+ private static final int BIT_WIDTH_16 = 16;
+ private static final int BIT_WIDTH_32 = 32;
+ private static final int BIT_WIDTH_64 = 64;
+ private static final boolean IS_SIGNED_FALSE = false;
+ private static final boolean IS_SIGNED_TRUE = true;
+
+ private final Set<ArrowType> supportedTypes;
+ private final Set<FunctionSignature> functionSignatures;
+
+ private static volatile ExpressionRegistry INSTANCE;
+
+ private ExpressionRegistry(Set<ArrowType> supportedTypes,
+ Set<FunctionSignature> functionSignatures) {
+ this.supportedTypes = supportedTypes;
+ this.functionSignatures = functionSignatures;
+ }
+
+ /**
+ * Returns a singleton instance of the class.
+ * @return singleton instance
+ * @throws GandivaException if error in Gandiva Library integration.
+ */
+ public static ExpressionRegistry getInstance() throws GandivaException {
+ if (INSTANCE == null) {
+ synchronized (ExpressionRegistry.class) {
+ if (INSTANCE == null) {
+ // ensure library is setup.
+ JniLoader.getInstance();
+ Set<ArrowType> typesFromGandiva = getSupportedTypesFromGandiva();
+ Set<FunctionSignature> functionsFromGandiva = getSupportedFunctionsFromGandiva();
+ INSTANCE = new ExpressionRegistry(typesFromGandiva, functionsFromGandiva);
+ }
+ }
+ }
+ return INSTANCE;
+ }
+
+ public Set<FunctionSignature> getSupportedFunctions() {
+ return functionSignatures;
+ }
+
+ public Set<ArrowType> getSupportedTypes() {
+ return supportedTypes;
+ }
+
+ private static Set<ArrowType> getSupportedTypesFromGandiva() throws GandivaException {
+ Set<ArrowType> supportedTypes = Sets.newHashSet();
+ try {
+ byte[] gandivaSupportedDataTypes = new ExpressionRegistryJniHelper()
+ .getGandivaSupportedDataTypes();
+ GandivaDataTypes gandivaDataTypes = GandivaDataTypes.parseFrom(gandivaSupportedDataTypes);
+ for (ExtGandivaType type : gandivaDataTypes.getDataTypeList()) {
+ supportedTypes.add(getArrowType(type));
+ }
+ } catch (InvalidProtocolBufferException invalidProtException) {
+ throw new GandivaException("Could not get supported types.", invalidProtException);
+ }
+ return supportedTypes;
+ }
+
+ private static Set<FunctionSignature> getSupportedFunctionsFromGandiva() throws
+ GandivaException {
+ Set<FunctionSignature> supportedTypes = Sets.newHashSet();
+ try {
+ byte[] gandivaSupportedFunctions = new ExpressionRegistryJniHelper()
+ .getGandivaSupportedFunctions();
+ GandivaFunctions gandivaFunctions = GandivaFunctions.parseFrom(gandivaSupportedFunctions);
+ for (GandivaTypes.FunctionSignature protoFunctionSignature
+ : gandivaFunctions.getFunctionList()) {
+
+ String functionName = protoFunctionSignature.getName();
+ ArrowType returnType = getArrowType(protoFunctionSignature.getReturnType());
+ List<ArrowType> paramTypes = Lists.newArrayList();
+ for (ExtGandivaType type : protoFunctionSignature.getParamTypesList()) {
+ paramTypes.add(getArrowType(type));
+ }
+ FunctionSignature functionSignature = new FunctionSignature(functionName,
+ returnType, paramTypes);
+ supportedTypes.add(functionSignature);
+ }
+ } catch (InvalidProtocolBufferException invalidProtException) {
+ throw new GandivaException("Could not get supported functions.", invalidProtException);
+ }
+ return supportedTypes;
+ }
+
+ private static ArrowType getArrowType(ExtGandivaType type) {
+ switch (type.getType().getNumber()) {
+ case GandivaType.BOOL_VALUE:
+ return ArrowType.Bool.INSTANCE;
+ case GandivaType.UINT8_VALUE:
+ return new ArrowType.Int(BIT_WIDTH8, IS_SIGNED_FALSE);
+ case GandivaType.INT8_VALUE:
+ return new ArrowType.Int(BIT_WIDTH8, IS_SIGNED_TRUE);
+ case GandivaType.UINT16_VALUE:
+ return new ArrowType.Int(BIT_WIDTH_16, IS_SIGNED_FALSE);
+ case GandivaType.INT16_VALUE:
+ return new ArrowType.Int(BIT_WIDTH_16, IS_SIGNED_TRUE);
+ case GandivaType.UINT32_VALUE:
+ return new ArrowType.Int(BIT_WIDTH_32, IS_SIGNED_FALSE);
+ case GandivaType.INT32_VALUE:
+ return new ArrowType.Int(BIT_WIDTH_32, IS_SIGNED_TRUE);
+ case GandivaType.UINT64_VALUE:
+ return new ArrowType.Int(BIT_WIDTH_64, IS_SIGNED_FALSE);
+ case GandivaType.INT64_VALUE:
+ return new ArrowType.Int(BIT_WIDTH_64, IS_SIGNED_TRUE);
+ case GandivaType.HALF_FLOAT_VALUE:
+ return new ArrowType.FloatingPoint(FloatingPointPrecision.HALF);
+ case GandivaType.FLOAT_VALUE:
+ return new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE);
+ case GandivaType.DOUBLE_VALUE:
+ return new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE);
+ case GandivaType.UTF8_VALUE:
+ return new ArrowType.Utf8();
+ case GandivaType.BINARY_VALUE:
+ return new ArrowType.Binary();
+ case GandivaType.DATE32_VALUE:
+ return new ArrowType.Date(DateUnit.DAY);
+ case GandivaType.DATE64_VALUE:
+ return new ArrowType.Date(DateUnit.MILLISECOND);
+ case GandivaType.TIMESTAMP_VALUE:
+ return new ArrowType.Timestamp(mapArrowTimeUnit(type.getTimeUnit()), null);
+ case GandivaType.TIME32_VALUE:
+ return new ArrowType.Time(mapArrowTimeUnit(type.getTimeUnit()),
+ BIT_WIDTH_32);
+ case GandivaType.TIME64_VALUE:
+ return new ArrowType.Time(mapArrowTimeUnit(type.getTimeUnit()),
+ BIT_WIDTH_64);
+ case GandivaType.NONE_VALUE:
+ return new ArrowType.Null();
+ case GandivaType.DECIMAL_VALUE:
+ return new ArrowType.Decimal(0, 0, 128);
+ case GandivaType.INTERVAL_VALUE:
+ return new ArrowType.Interval(mapArrowIntervalUnit(type.getIntervalType()));
+ case GandivaType.FIXED_SIZE_BINARY_VALUE:
+ case GandivaType.MAP_VALUE:
+ case GandivaType.DICTIONARY_VALUE:
+ case GandivaType.LIST_VALUE:
+ case GandivaType.STRUCT_VALUE:
+ case GandivaType.UNION_VALUE:
+ default:
+ assert false;
+ }
+ return null;
+ }
+
+ private static TimeUnit mapArrowTimeUnit(GandivaTypes.TimeUnit timeUnit) {
+ switch (timeUnit.getNumber()) {
+ case GandivaTypes.TimeUnit.MICROSEC_VALUE:
+ return TimeUnit.MICROSECOND;
+ case GandivaTypes.TimeUnit.MILLISEC_VALUE:
+ return TimeUnit.MILLISECOND;
+ case GandivaTypes.TimeUnit.NANOSEC_VALUE:
+ return TimeUnit.NANOSECOND;
+ case GandivaTypes.TimeUnit.SEC_VALUE:
+ return TimeUnit.SECOND;
+ default:
+ return null;
+ }
+ }
+
+ private static IntervalUnit mapArrowIntervalUnit(GandivaTypes.IntervalType intervalType) {
+ switch (intervalType.getNumber()) {
+ case GandivaTypes.IntervalType.YEAR_MONTH_VALUE:
+ return IntervalUnit.YEAR_MONTH;
+ case GandivaTypes.IntervalType.DAY_TIME_VALUE:
+ return IntervalUnit.DAY_TIME;
+ default:
+ return null;
+ }
+ }
+
+}
+
diff --git a/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistryJniHelper.java b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistryJniHelper.java
new file mode 100644
index 000000000..86c1eaaed
--- /dev/null
+++ b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistryJniHelper.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.evaluator;
+
+/**
+ * JNI Adapter used to get supported types and functions
+ * from Gandiva.
+ */
+class ExpressionRegistryJniHelper {
+
+ native byte[] getGandivaSupportedDataTypes();
+
+ native byte[] getGandivaSupportedFunctions();
+}
diff --git a/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Filter.java b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Filter.java
new file mode 100644
index 000000000..010d644d1
--- /dev/null
+++ b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Filter.java
@@ -0,0 +1,199 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.evaluator;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.arrow.gandiva.exceptions.EvaluatorClosedException;
+import org.apache.arrow.gandiva.exceptions.GandivaException;
+import org.apache.arrow.gandiva.expression.ArrowTypeHelper;
+import org.apache.arrow.gandiva.expression.Condition;
+import org.apache.arrow.gandiva.ipc.GandivaTypes;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.vector.ipc.message.ArrowBuffer;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * This class provides a mechanism to filter a RecordBatch by evaluating a condition expression.
+ * Follow these steps to use this class: 1) Use the static method make() to create an instance of
+ * this class that evaluates a condition. 2) Invoke the method evaluate() to evaluate the filter
+ * against a RecordBatch 3) Invoke close() to release resources
+ */
+public class Filter {
+
+ private static final Logger logger = LoggerFactory.getLogger(Filter.class);
+
+ private final JniWrapper wrapper;
+ private final long moduleId;
+ private final Schema schema;
+ private boolean closed;
+
+ private Filter(JniWrapper wrapper, long moduleId, Schema schema) {
+ this.wrapper = wrapper;
+ this.moduleId = moduleId;
+ this.schema = schema;
+ this.closed = false;
+ }
+
+ /**
+ * Invoke this function to generate LLVM code to evaluate the condition expression. Invoke
+ * Filter::Evaluate() against a RecordBatch to evaluate the filter on this record batch
+ *
+ * @param schema Table schema. The field names in the schema should match the fields used to
+ * create the TreeNodes
+ * @param condition condition to be evaluated against data
+ * @return A native filter object that can be used to invoke on a RecordBatch
+ */
+ public static Filter make(Schema schema, Condition condition) throws GandivaException {
+ return make(schema, condition, JniLoader.getDefaultConfiguration());
+ }
+
+ /**
+ * Invoke this function to generate LLVM code to evaluate the condition expression. Invoke
+ * Filter::Evaluate() against a RecordBatch to evaluate the filter on this record batch
+ *
+ * @param schema Table schema. The field names in the schema should match the fields used to
+ * create the TreeNodes
+ * @param condition condition to be evaluated against data
+ * @param configOptions ConfigOptions parameter
+ * @return A native filter object that can be used to invoke on a RecordBatch
+ */
+ public static Filter make(Schema schema, Condition condition, ConfigurationBuilder.ConfigOptions configOptions)
+ throws GandivaException {
+ return make(schema, condition, JniLoader.getConfiguration(configOptions));
+ }
+
+ /**
+ * Invoke this function to generate LLVM code to evaluate the condition expression. Invoke
+ * Filter::Evaluate() against a RecordBatch to evaluate the filter on this record batch
+ *
+ * @param schema Table schema. The field names in the schema should match the fields used to
+ * create the TreeNodes
+ * @param condition condition to be evaluated against data
+ * @param optimize Flag to choose if the generated llvm code is to be optimized
+ * @return A native filter object that can be used to invoke on a RecordBatch
+ */
+ @Deprecated
+ public static Filter make(Schema schema, Condition condition, boolean optimize) throws GandivaException {
+ return make(schema, condition, JniLoader.getConfiguration((new ConfigurationBuilder.ConfigOptions())
+ .withOptimize(optimize)));
+ }
+
+ /**
+ * Invoke this function to generate LLVM code to evaluate the condition expression. Invoke
+ * Filter::Evaluate() against a RecordBatch to evaluate the filter on this record batch
+ *
+ * @param schema Table schema. The field names in the schema should match the fields used to
+ * create the TreeNodes
+ * @param condition condition to be evaluated against data
+ * @param configurationId Custom configuration created through config builder.
+ * @return A native evaluator object that can be used to invoke these projections on a RecordBatch
+ */
+ public static Filter make(Schema schema, Condition condition, long configurationId)
+ throws GandivaException {
+ // Invoke the JNI layer to create the LLVM module representing the filter.
+ GandivaTypes.Condition conditionBuf = condition.toProtobuf();
+ GandivaTypes.Schema schemaBuf = ArrowTypeHelper.arrowSchemaToProtobuf(schema);
+ JniWrapper wrapper = JniLoader.getInstance().getWrapper();
+ long moduleId = wrapper.buildFilter(schemaBuf.toByteArray(),
+ conditionBuf.toByteArray(), configurationId);
+ logger.debug("Created module for the filter with id {}", moduleId);
+ return new Filter(wrapper, moduleId, schema);
+ }
+
+ /**
+ * Invoke this function to evaluate a filter against a recordBatch.
+ *
+ * @param recordBatch Record batch including the data
+ * @param selectionVector Result of applying the filter on the data
+ */
+ public void evaluate(ArrowRecordBatch recordBatch, SelectionVector selectionVector)
+ throws GandivaException {
+ evaluate(recordBatch.getLength(), recordBatch.getBuffers(), recordBatch.getBuffersLayout(),
+ selectionVector);
+ }
+
+ /**
+ * Invoke this function to evaluate filter against a set of arrow buffers. (this is an optimised
+ * version that skips taking references).
+ *
+ * @param numRows number of rows.
+ * @param buffers List of input arrow buffers
+ * @param selectionVector Result of applying the filter on the data
+ */
+ public void evaluate(int numRows, List<ArrowBuf> buffers,
+ SelectionVector selectionVector) throws GandivaException {
+ List<ArrowBuffer> buffersLayout = new ArrayList<>();
+ long offset = 0;
+ for (ArrowBuf arrowBuf : buffers) {
+ long size = arrowBuf.readableBytes();
+ buffersLayout.add(new ArrowBuffer(offset, size));
+ offset += size;
+ }
+ evaluate(numRows, buffers, buffersLayout, selectionVector);
+ }
+
+ private void evaluate(int numRows, List<ArrowBuf> buffers, List<ArrowBuffer> buffersLayout,
+ SelectionVector selectionVector) throws GandivaException {
+ if (this.closed) {
+ throw new EvaluatorClosedException();
+ }
+ if (selectionVector.getMaxRecords() < numRows) {
+ logger.error("selectionVector has capacity for " + selectionVector.getMaxRecords() +
+ " rows, minimum required " + numRows);
+ throw new GandivaException("SelectionVector too small");
+ }
+
+ long[] bufAddrs = new long[buffers.size()];
+ long[] bufSizes = new long[buffers.size()];
+
+ int idx = 0;
+ for (ArrowBuf buf : buffers) {
+ bufAddrs[idx++] = buf.memoryAddress();
+ }
+
+ idx = 0;
+ for (ArrowBuffer bufLayout : buffersLayout) {
+ bufSizes[idx++] = bufLayout.getSize();
+ }
+
+ int numRecords = wrapper.evaluateFilter(this.moduleId, numRows,
+ bufAddrs, bufSizes,
+ selectionVector.getType().getNumber(),
+ selectionVector.getBuffer().memoryAddress(), selectionVector.getBuffer().capacity());
+ if (numRecords >= 0) {
+ selectionVector.setRecordCount(numRecords);
+ }
+ }
+
+ /**
+ * Closes the LLVM module representing this filter.
+ */
+ public void close() throws GandivaException {
+ if (this.closed) {
+ return;
+ }
+
+ wrapper.closeFilter(this.moduleId);
+ this.closed = true;
+ }
+}
diff --git a/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/FunctionSignature.java b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/FunctionSignature.java
new file mode 100644
index 000000000..d01881843
--- /dev/null
+++ b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/FunctionSignature.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.evaluator;
+
+import java.util.List;
+
+import org.apache.arrow.vector.types.pojo.ArrowType;
+
+import com.google.common.base.MoreObjects;
+import com.google.common.base.Objects;
+
+/**
+ * POJO to define a function signature.
+ */
+public class FunctionSignature {
+ private final String name;
+ private final ArrowType returnType;
+ private final List<ArrowType> paramTypes;
+
+ public ArrowType getReturnType() {
+ return returnType;
+ }
+
+ public List<ArrowType> getParamTypes() {
+ return paramTypes;
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ /**
+ * Ctor.
+ * @param name - name of the function.
+ * @param returnType - data type of return
+ * @param paramTypes - data type of input args.
+ */
+ public FunctionSignature(String name, ArrowType returnType, List<ArrowType> paramTypes) {
+ this.name = name;
+ this.returnType = returnType;
+ this.paramTypes = paramTypes;
+ }
+
+ /**
+ * Override equals.
+ * @param signature - signature to compare
+ * @return true if equal and false if not.
+ */
+ public boolean equals(Object signature) {
+ if (signature == null) {
+ return false;
+ }
+ if (getClass() != signature.getClass()) {
+ return false;
+ }
+ final FunctionSignature other = (FunctionSignature) signature;
+ return this.name.equalsIgnoreCase(other.name) &&
+ Objects.equal(this.returnType, other.returnType) &&
+ Objects.equal(this.paramTypes, other.paramTypes);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hashCode(this.name.toLowerCase(), this.returnType, this.paramTypes);
+ }
+
+ @Override
+ public String toString() {
+ return MoreObjects.toStringHelper(this)
+ .add("name ", name)
+ .add("return type ", returnType)
+ .add("param types ", paramTypes)
+ .toString();
+
+ }
+
+
+}
diff --git a/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniLoader.java b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniLoader.java
new file mode 100644
index 000000000..676956a34
--- /dev/null
+++ b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniLoader.java
@@ -0,0 +1,170 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.evaluator;
+
+import static java.util.UUID.randomUUID;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.StandardCopyOption;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+
+import org.apache.arrow.gandiva.exceptions.GandivaException;
+
+/**
+ * This class handles loading of the jni library, and acts as a bridge for the native functions.
+ */
+class JniLoader {
+ private static final String LIBRARY_NAME = "gandiva_jni";
+
+ private static volatile JniLoader INSTANCE;
+ private static volatile long defaultConfiguration = 0L;
+ private static final ConcurrentMap<ConfigurationBuilder.ConfigOptions, Long> configurationMap
+ = new ConcurrentHashMap<>();
+
+ private final JniWrapper wrapper;
+
+ private JniLoader() {
+ this.wrapper = new JniWrapper();
+ }
+
+ static JniLoader getInstance() throws GandivaException {
+ if (INSTANCE == null) {
+ synchronized (JniLoader.class) {
+ if (INSTANCE == null) {
+ INSTANCE = setupInstance();
+ }
+ }
+ }
+ return INSTANCE;
+ }
+
+ private static JniLoader setupInstance() throws GandivaException {
+ try {
+ String tempDir = System.getProperty("java.io.tmpdir");
+ loadGandivaLibraryFromJar(tempDir);
+ return new JniLoader();
+ } catch (IOException ioException) {
+ throw new GandivaException("unable to create native instance", ioException);
+ }
+ }
+
+ private static void loadGandivaLibraryFromJar(final String tmpDir)
+ throws IOException, GandivaException {
+ final String libraryToLoad = System.mapLibraryName(LIBRARY_NAME);
+ final File libraryFile = moveFileFromJarToTemp(tmpDir, libraryToLoad);
+ System.load(libraryFile.getAbsolutePath());
+ }
+
+
+ private static File moveFileFromJarToTemp(final String tmpDir, String libraryToLoad)
+ throws IOException, GandivaException {
+ final File temp = setupFile(tmpDir, libraryToLoad);
+ try (final InputStream is = JniLoader.class.getClassLoader()
+ .getResourceAsStream(libraryToLoad)) {
+ if (is == null) {
+ throw new GandivaException(libraryToLoad + " was not found inside JAR.");
+ } else {
+ Files.copy(is, temp.toPath(), StandardCopyOption.REPLACE_EXISTING);
+ }
+ }
+ return temp;
+ }
+
+ private static File setupFile(String tmpDir, String libraryToLoad)
+ throws IOException, GandivaException {
+ // accommodate multiple processes running with gandiva jar.
+ // length should be ok since uuid is only 36 characters.
+ final String randomizeFileName = libraryToLoad + randomUUID();
+ final File temp = new File(tmpDir, randomizeFileName);
+ if (temp.exists() && !temp.delete()) {
+ throw new GandivaException("File: " + temp.getAbsolutePath() +
+ " already exists and cannot be removed.");
+ }
+ if (!temp.createNewFile()) {
+ throw new GandivaException("File: " + temp.getAbsolutePath() +
+ " could not be created.");
+ }
+ temp.deleteOnExit();
+ return temp;
+ }
+
+ /**
+ * Returns the jni wrapper.
+ */
+ JniWrapper getWrapper() throws GandivaException {
+ return wrapper;
+ }
+
+ static long getConfiguration(ConfigurationBuilder.ConfigOptions configOptions) throws GandivaException {
+ if (!configurationMap.containsKey(configOptions)) {
+ synchronized (ConfigurationBuilder.class) {
+ if (!configurationMap.containsKey(configOptions)) {
+ JniLoader.getInstance(); // setup
+ long configInstance = new ConfigurationBuilder()
+ .buildConfigInstance(configOptions);
+ configurationMap.put(configOptions, configInstance);
+ if (ConfigurationBuilder.ConfigOptions.getDefault().equals(configOptions)) {
+ defaultConfiguration = configInstance;
+ }
+ return configInstance;
+ }
+ }
+ }
+ return configurationMap.get(configOptions);
+ }
+
+ /**
+ * Get the default configuration to invoke gandiva.
+ * @return default configuration
+ * @throws GandivaException if unable to get native builder instance.
+ */
+ static long getDefaultConfiguration() throws GandivaException {
+ if (defaultConfiguration == 0L) {
+ synchronized (ConfigurationBuilder.class) {
+ if (defaultConfiguration == 0L) {
+ JniLoader.getInstance(); // setup
+ ConfigurationBuilder.ConfigOptions defaultConfigOptons = ConfigurationBuilder.ConfigOptions.getDefault();
+ defaultConfiguration = new ConfigurationBuilder()
+ .buildConfigInstance(defaultConfigOptons);
+ configurationMap.put(defaultConfigOptons, defaultConfiguration);
+ }
+ }
+ }
+ return defaultConfiguration;
+ }
+
+ /**
+ * Remove the configuration.
+ */
+ static void removeConfiguration(ConfigurationBuilder.ConfigOptions configOptions) {
+ if (configurationMap.containsKey(configOptions)) {
+ synchronized (ConfigurationBuilder.class) {
+ if (configurationMap.containsKey(configOptions)) {
+ (new ConfigurationBuilder()).releaseConfigInstance(configurationMap.remove(configOptions));
+ if (configOptions.equals(ConfigurationBuilder.ConfigOptions.getDefault())) {
+ defaultConfiguration = 0;
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniWrapper.java b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniWrapper.java
new file mode 100644
index 000000000..520ef5f44
--- /dev/null
+++ b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniWrapper.java
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.evaluator;
+
+import org.apache.arrow.gandiva.exceptions.GandivaException;
+
+/**
+ * This class is implemented in JNI. This provides the Java interface
+ * to invoke functions in JNI.
+ * This file is used to generated the .h files required for jni. Avoid all
+ * external dependencies in this file.
+ */
+public class JniWrapper {
+
+ /**
+ * Generates the projector module to evaluate the expressions with
+ * custom configuration.
+ *
+ * @param schemaBuf The schema serialized as a protobuf. See Types.proto
+ * to see the protobuf specification
+ * @param exprListBuf The serialized protobuf of the expression vector. Each
+ * expression is created using TreeBuilder::MakeExpression.
+ * @param selectionVectorType type of selection vector
+ * @param configId Configuration to gandiva.
+ * @return A moduleId that is passed to the evaluateProjector() and closeProjector() methods
+ *
+ */
+ native long buildProjector(byte[] schemaBuf, byte[] exprListBuf,
+ int selectionVectorType,
+ long configId) throws GandivaException;
+
+ /**
+ * Evaluate the expressions represented by the moduleId on a record batch
+ * and store the output in ValueVectors. Throws an exception in case of errors
+ *
+ * @param expander VectorExpander object. Used for callbacks from cpp.
+ * @param moduleId moduleId representing expressions. Created using a call to
+ * buildNativeCode
+ * @param numRows Number of rows in the record batch
+ * @param bufAddrs An array of memory addresses. Each memory address points to
+ * a validity vector or a data vector (will add support for offset
+ * vectors later).
+ * @param bufSizes An array of buffer sizes. For each memory address in bufAddrs,
+ * the size of the buffer is present in bufSizes
+ * @param outAddrs An array of output buffers, including the validity and data
+ * addresses.
+ * @param outSizes The allocated size of the output buffers. On successful evaluation,
+ * the result is stored in the output buffers
+ */
+ native void evaluateProjector(Object expander, long moduleId, int numRows,
+ long[] bufAddrs, long[] bufSizes,
+ int selectionVectorType, int selectionVectorSize,
+ long selectionVectorBufferAddr, long selectionVectorBufferSize,
+ long[] outAddrs, long[] outSizes) throws GandivaException;
+
+ /**
+ * Closes the projector referenced by moduleId.
+ *
+ * @param moduleId moduleId that needs to be closed
+ */
+ native void closeProjector(long moduleId);
+
+ /**
+ * Generates the filter module to evaluate the condition expression with
+ * custom configuration.
+ *
+ * @param schemaBuf The schema serialized as a protobuf. See Types.proto
+ * to see the protobuf specification
+ * @param conditionBuf The serialized protobuf of the condition expression. Each
+ * expression is created using TreeBuilder::MakeCondition
+ * @param configId Configuration to gandiva.
+ * @return A moduleId that is passed to the evaluateFilter() and closeFilter() methods
+ *
+ */
+ native long buildFilter(byte[] schemaBuf, byte[] conditionBuf,
+ long configId) throws GandivaException;
+
+ /**
+ * Evaluate the filter represented by the moduleId on a record batch
+ * and store the output in buffer 'outAddr'. Throws an exception in case of errors
+ *
+ * @param moduleId moduleId representing expressions. Created using a call to
+ * buildNativeCode
+ * @param numRows Number of rows in the record batch
+ * @param bufAddrs An array of memory addresses. Each memory address points to
+ * a validity vector or a data vector (will add support for offset
+ * vectors later).
+ * @param bufSizes An array of buffer sizes. For each memory address in bufAddrs,
+ * the size of the buffer is present in bufSizes
+ * @param selectionVectorType type of selection vector
+ * @param outAddr output buffer, whose type is represented by selectionVectorType
+ * @param outSize The allocated size of the output buffer. On successful evaluation,
+ * the result is stored in the output buffer
+ */
+ native int evaluateFilter(long moduleId, int numRows, long[] bufAddrs, long[] bufSizes,
+ int selectionVectorType,
+ long outAddr, long outSize) throws GandivaException;
+
+ /**
+ * Closes the filter referenced by moduleId.
+ *
+ * @param moduleId moduleId that needs to be closed
+ */
+ native void closeFilter(long moduleId);
+}
diff --git a/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Projector.java b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Projector.java
new file mode 100644
index 000000000..471ddbced
--- /dev/null
+++ b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Projector.java
@@ -0,0 +1,364 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.evaluator;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.arrow.gandiva.exceptions.EvaluatorClosedException;
+import org.apache.arrow.gandiva.exceptions.GandivaException;
+import org.apache.arrow.gandiva.exceptions.UnsupportedTypeException;
+import org.apache.arrow.gandiva.expression.ArrowTypeHelper;
+import org.apache.arrow.gandiva.expression.ExpressionTree;
+import org.apache.arrow.gandiva.ipc.GandivaTypes;
+import org.apache.arrow.gandiva.ipc.GandivaTypes.SelectionVectorType;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.vector.BaseVariableWidthVector;
+import org.apache.arrow.vector.FixedWidthVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.VariableWidthVector;
+import org.apache.arrow.vector.ipc.message.ArrowBuffer;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+/**
+ * This class provides a mechanism to evaluate a set of expressions against a RecordBatch.
+ * Follow these steps to use this class:
+ * 1) Use the static method make() to create an instance of this class that evaluates a
+ * set of expressions
+ * 2) Invoke the method evaluate() to evaluate these expressions against a RecordBatch
+ * 3) Invoke close() to release resources
+ */
+public class Projector {
+ private static final org.slf4j.Logger logger =
+ org.slf4j.LoggerFactory.getLogger(Projector.class);
+
+ private JniWrapper wrapper;
+ private final long moduleId;
+ private final Schema schema;
+ private final int numExprs;
+ private boolean closed;
+
+ private Projector(JniWrapper wrapper, long moduleId, Schema schema, int numExprs) {
+ this.wrapper = wrapper;
+ this.moduleId = moduleId;
+ this.schema = schema;
+ this.numExprs = numExprs;
+ this.closed = false;
+ }
+
+ /**
+ * Invoke this function to generate LLVM code to evaluate the list of project expressions.
+ * Invoke Projector::Evaluate() against a RecordBatch to evaluate the record batch
+ * against these projections.
+ *
+ * @param schema Table schema. The field names in the schema should match the fields used
+ * to create the TreeNodes
+ * @param exprs List of expressions to be evaluated against data
+ *
+ * @return A native evaluator object that can be used to invoke these projections on a RecordBatch
+ */
+ public static Projector make(Schema schema, List<ExpressionTree> exprs)
+ throws GandivaException {
+ return make(schema, exprs, SelectionVectorType.SV_NONE, JniLoader.getDefaultConfiguration());
+ }
+
+ /**
+ * Invoke this function to generate LLVM code to evaluate the list of project expressions.
+ * Invoke Projector::Evaluate() against a RecordBatch to evaluate the record batch
+ * against these projections.
+ *
+ * @param schema Table schema. The field names in the schema should match the fields used
+ * to create the TreeNodes
+ * @param exprs List of expressions to be evaluated against data
+ * @param configOptions ConfigOptions parameter
+ *
+ * @return A native evaluator object that can be used to invoke these projections on a RecordBatch
+ */
+ public static Projector make(Schema schema, List<ExpressionTree> exprs,
+ ConfigurationBuilder.ConfigOptions configOptions) throws GandivaException {
+ return make(schema, exprs, SelectionVectorType.SV_NONE, JniLoader.getConfiguration(configOptions));
+ }
+
+ /**
+ * Invoke this function to generate LLVM code to evaluate the list of project expressions.
+ * Invoke Projector::Evaluate() against a RecordBatch to evaluate the record batch
+ * against these projections.
+ *
+ * @param schema Table schema. The field names in the schema should match the fields used
+ * to create the TreeNodes
+ * @param exprs List of expressions to be evaluated against data
+ * @param optimize Flag to choose if the generated llvm code is to be optimized
+ *
+ * @return A native evaluator object that can be used to invoke these projections on a RecordBatch
+ */
+ @Deprecated
+ public static Projector make(Schema schema, List<ExpressionTree> exprs, boolean optimize)
+ throws GandivaException {
+ return make(schema, exprs, SelectionVectorType.SV_NONE,
+ JniLoader.getConfiguration((new ConfigurationBuilder.ConfigOptions()).withOptimize(optimize)));
+ }
+
+ /**
+ * Invoke this function to generate LLVM code to evaluate the list of project expressions.
+ * Invoke Projector::Evaluate() against a RecordBatch to evaluate the record batch
+ * against these projections.
+ *
+ * @param schema Table schema. The field names in the schema should match the fields used
+ * to create the TreeNodes
+ * @param exprs List of expressions to be evaluated against data
+ * @param selectionVectorType type of selection vector
+ *
+ * @return A native evaluator object that can be used to invoke these projections on a RecordBatch
+ */
+ public static Projector make(Schema schema, List<ExpressionTree> exprs,
+ SelectionVectorType selectionVectorType)
+ throws GandivaException {
+ return make(schema, exprs, selectionVectorType, JniLoader.getDefaultConfiguration());
+ }
+
+ /**
+ * Invoke this function to generate LLVM code to evaluate the list of project expressions.
+ * Invoke Projector::Evaluate() against a RecordBatch to evaluate the record batch
+ * against these projections.
+ *
+ * @param schema Table schema. The field names in the schema should match the fields used
+ * to create the TreeNodes
+ * @param exprs List of expressions to be evaluated against data
+ * @param selectionVectorType type of selection vector
+ * @param configOptions ConfigOptions parameter
+ *
+ * @return A native evaluator object that can be used to invoke these projections on a RecordBatch
+ */
+ public static Projector make(Schema schema, List<ExpressionTree> exprs, SelectionVectorType selectionVectorType,
+ ConfigurationBuilder.ConfigOptions configOptions) throws GandivaException {
+ return make(schema, exprs, selectionVectorType, JniLoader.getConfiguration(configOptions));
+ }
+
+ /**
+ * Invoke this function to generate LLVM code to evaluate the list of project expressions.
+ * Invoke Projector::Evaluate() against a RecordBatch to evaluate the record batch
+ * against these projections.
+ *
+ * @param schema Table schema. The field names in the schema should match the fields used
+ * to create the TreeNodes
+ * @param exprs List of expressions to be evaluated against data
+ * @param selectionVectorType type of selection vector
+ * @param optimize Flag to choose if the generated llvm code is to be optimized
+ *
+ * @return A native evaluator object that can be used to invoke these projections on a RecordBatch
+ */
+ @Deprecated
+ public static Projector make(Schema schema, List<ExpressionTree> exprs,
+ SelectionVectorType selectionVectorType, boolean optimize)
+ throws GandivaException {
+ return make(schema, exprs, selectionVectorType,
+ JniLoader.getConfiguration((new ConfigurationBuilder.ConfigOptions()).withOptimize(optimize)));
+ }
+
+ /**
+ * Invoke this function to generate LLVM code to evaluate the list of project expressions.
+ * Invoke Projector::Evaluate() against a RecordBatch to evaluate the record batch
+ * against these projections.
+ *
+ * @param schema Table schema. The field names in the schema should match the fields used
+ * to create the TreeNodes
+ * @param exprs List of expressions to be evaluated against data
+ * @param selectionVectorType type of selection vector
+ * @param configurationId Custom configuration created through config builder.
+ *
+ * @return A native evaluator object that can be used to invoke these projections on a RecordBatch
+ */
+ public static Projector make(Schema schema, List<ExpressionTree> exprs,
+ SelectionVectorType selectionVectorType,
+ long configurationId) throws GandivaException {
+ // serialize the schema and the list of expressions as a protobuf
+ GandivaTypes.ExpressionList.Builder builder = GandivaTypes.ExpressionList.newBuilder();
+ for (ExpressionTree expr : exprs) {
+ builder.addExprs(expr.toProtobuf());
+ }
+
+ // Invoke the JNI layer to create the LLVM module representing the expressions
+ GandivaTypes.Schema schemaBuf = ArrowTypeHelper.arrowSchemaToProtobuf(schema);
+ JniWrapper wrapper = JniLoader.getInstance().getWrapper();
+ long moduleId = wrapper.buildProjector(schemaBuf.toByteArray(),
+ builder.build().toByteArray(), selectionVectorType.getNumber(), configurationId);
+ logger.debug("Created module for the projector with id {}", moduleId);
+ return new Projector(wrapper, moduleId, schema, exprs.size());
+ }
+
+ /**
+ * Invoke this function to evaluate a set of expressions against a recordBatch.
+ *
+ * @param recordBatch Record batch including the data
+ * @param outColumns Result of applying the project on the data
+ */
+ public void evaluate(ArrowRecordBatch recordBatch, List<ValueVector> outColumns)
+ throws GandivaException {
+ evaluate(recordBatch.getLength(), recordBatch.getBuffers(),
+ recordBatch.getBuffersLayout(),
+ SelectionVectorType.SV_NONE.getNumber(), recordBatch.getLength(),
+ 0, 0, outColumns);
+ }
+
+ /**
+ * Invoke this function to evaluate a set of expressions against a set of arrow buffers.
+ * (this is an optimised version that skips taking references).
+ *
+ * @param numRows number of rows.
+ * @param buffers List of input arrow buffers
+ * @param outColumns Result of applying the project on the data
+ */
+ public void evaluate(int numRows, List<ArrowBuf> buffers,
+ List<ValueVector> outColumns) throws GandivaException {
+ List<ArrowBuffer> buffersLayout = new ArrayList<>();
+ long offset = 0;
+ for (ArrowBuf arrowBuf : buffers) {
+ long size = arrowBuf.readableBytes();
+ buffersLayout.add(new ArrowBuffer(offset, size));
+ offset += size;
+ }
+ evaluate(numRows, buffers, buffersLayout,
+ SelectionVectorType.SV_NONE.getNumber(),
+ numRows, 0, 0, outColumns);
+ }
+
+ /**
+ * Invoke this function to evaluate a set of expressions against a {@link ArrowRecordBatch}.
+ *
+ * @param recordBatch The data to evaluate against.
+ * @param selectionVector Selection vector which stores the selected rows.
+ * @param outColumns Result of applying the project on the data
+ */
+ public void evaluate(ArrowRecordBatch recordBatch,
+ SelectionVector selectionVector, List<ValueVector> outColumns)
+ throws GandivaException {
+ evaluate(recordBatch.getLength(), recordBatch.getBuffers(),
+ recordBatch.getBuffersLayout(),
+ selectionVector.getType().getNumber(),
+ selectionVector.getRecordCount(),
+ selectionVector.getBuffer().memoryAddress(),
+ selectionVector.getBuffer().capacity(),
+ outColumns);
+ }
+
+ /**
+ * Invoke this function to evaluate a set of expressions against a set of arrow buffers
+ * on the selected positions.
+ * (this is an optimised version that skips taking references).
+ *
+ * @param numRows number of rows.
+ * @param buffers List of input arrow buffers
+ * @param selectionVector Selection vector which stores the selected rows.
+ * @param outColumns Result of applying the project on the data
+ */
+ public void evaluate(int numRows, List<ArrowBuf> buffers,
+ SelectionVector selectionVector,
+ List<ValueVector> outColumns) throws GandivaException {
+ List<ArrowBuffer> buffersLayout = new ArrayList<>();
+ long offset = 0;
+ for (ArrowBuf arrowBuf : buffers) {
+ long size = arrowBuf.readableBytes();
+ buffersLayout.add(new ArrowBuffer(offset, size));
+ offset += size;
+ }
+ evaluate(numRows, buffers, buffersLayout,
+ selectionVector.getType().getNumber(),
+ selectionVector.getRecordCount(),
+ selectionVector.getBuffer().memoryAddress(),
+ selectionVector.getBuffer().capacity(),
+ outColumns);
+ }
+
+ private void evaluate(int numRows, List<ArrowBuf> buffers, List<ArrowBuffer> buffersLayout,
+ int selectionVectorType, int selectionVectorRecordCount,
+ long selectionVectorAddr, long selectionVectorSize,
+ List<ValueVector> outColumns) throws GandivaException {
+ if (this.closed) {
+ throw new EvaluatorClosedException();
+ }
+
+ if (numExprs != outColumns.size()) {
+ logger.info("Expected " + numExprs + " columns, got " + outColumns.size());
+ throw new GandivaException("Incorrect number of columns for the output vector");
+ }
+
+ long[] bufAddrs = new long[buffers.size()];
+ long[] bufSizes = new long[buffers.size()];
+
+ int idx = 0;
+ for (ArrowBuf buf : buffers) {
+ bufAddrs[idx++] = buf.memoryAddress();
+ }
+
+ idx = 0;
+ for (ArrowBuffer bufLayout : buffersLayout) {
+ bufSizes[idx++] = bufLayout.getSize();
+ }
+
+ boolean hasVariableWidthColumns = false;
+ BaseVariableWidthVector[] resizableVectors = new BaseVariableWidthVector[outColumns.size()];
+ long[] outAddrs = new long[3 * outColumns.size()];
+ long[] outSizes = new long[3 * outColumns.size()];
+ idx = 0;
+ int outColumnIdx = 0;
+ for (ValueVector valueVector : outColumns) {
+ boolean isFixedWith = valueVector instanceof FixedWidthVector;
+ boolean isVarWidth = valueVector instanceof VariableWidthVector;
+ if (!isFixedWith && !isVarWidth) {
+ throw new UnsupportedTypeException(
+ "Unsupported value vector type " + valueVector.getField().getFieldType());
+ }
+
+ outAddrs[idx] = valueVector.getValidityBuffer().memoryAddress();
+ outSizes[idx++] = valueVector.getValidityBuffer().capacity();
+ if (isVarWidth) {
+ outAddrs[idx] = valueVector.getOffsetBuffer().memoryAddress();
+ outSizes[idx++] = valueVector.getOffsetBuffer().capacity();
+ hasVariableWidthColumns = true;
+
+ // save vector to allow for resizing.
+ resizableVectors[outColumnIdx] = (BaseVariableWidthVector) valueVector;
+ }
+ outAddrs[idx] = valueVector.getDataBuffer().memoryAddress();
+ outSizes[idx++] = valueVector.getDataBuffer().capacity();
+
+ valueVector.setValueCount(selectionVectorRecordCount);
+ outColumnIdx++;
+ }
+
+ wrapper.evaluateProjector(
+ hasVariableWidthColumns ? new VectorExpander(resizableVectors) : null,
+ this.moduleId, numRows, bufAddrs, bufSizes,
+ selectionVectorType, selectionVectorRecordCount,
+ selectionVectorAddr, selectionVectorSize,
+ outAddrs, outSizes);
+ }
+
+ /**
+ * Closes the LLVM module representing this evaluator.
+ */
+ public void close() throws GandivaException {
+ if (this.closed) {
+ return;
+ }
+
+ wrapper.closeProjector(this.moduleId);
+ this.closed = true;
+ }
+}
diff --git a/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/SelectionVector.java b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/SelectionVector.java
new file mode 100644
index 000000000..2af88b526
--- /dev/null
+++ b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/SelectionVector.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.evaluator;
+
+import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt;
+
+import org.apache.arrow.gandiva.ipc.GandivaTypes.SelectionVectorType;
+import org.apache.arrow.memory.ArrowBuf;
+
+/**
+ * A selection vector contains the indexes of "selected" records in a row batch. It is backed by an
+ * arrow buffer.
+ * Client manages the lifecycle of the arrow buffer - to release the reference.
+ */
+public abstract class SelectionVector {
+ private int recordCount;
+ private ArrowBuf buffer;
+
+ public SelectionVector(ArrowBuf buffer) {
+ this.buffer = buffer;
+ }
+
+ public final ArrowBuf getBuffer() {
+ return this.buffer;
+ }
+
+ /*
+ * The maximum number of records that the selection vector can hold.
+ */
+ public final int getMaxRecords() {
+ return capAtMaxInt(buffer.capacity() / getRecordSize());
+ }
+
+ /*
+ * The number of records held by the selection vector.
+ */
+ public final int getRecordCount() {
+ return this.recordCount;
+ }
+
+ /*
+ * Set the number of records in the selection vector.
+ */
+ final void setRecordCount(int recordCount) {
+ if (recordCount * getRecordSize() > buffer.capacity()) {
+ throw new IllegalArgumentException("recordCount " + recordCount +
+ " of size " + getRecordSize() +
+ " exceeds buffer capacity " + buffer.capacity());
+ }
+
+ this.recordCount = recordCount;
+ }
+
+ /*
+ * Get the value at specified index.
+ */
+ public abstract int getIndex(int index);
+
+ /*
+ * Get the record size of the selection vector itself.
+ */
+ abstract int getRecordSize();
+
+ abstract SelectionVectorType getType();
+
+ final void checkReadBounds(int index) {
+ if (index >= this.recordCount) {
+ throw new IllegalArgumentException("index " + index + " is >= recordCount " + recordCount);
+ }
+ }
+
+}
diff --git a/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/SelectionVectorInt16.java b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/SelectionVectorInt16.java
new file mode 100644
index 000000000..84c795b67
--- /dev/null
+++ b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/SelectionVectorInt16.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.evaluator;
+
+import org.apache.arrow.gandiva.ipc.GandivaTypes.SelectionVectorType;
+import org.apache.arrow.memory.ArrowBuf;
+
+/**
+ * Selection vector with records of arrow type INT16.
+ */
+public class SelectionVectorInt16 extends SelectionVector {
+
+ public SelectionVectorInt16(ArrowBuf buffer) {
+ super(buffer);
+ }
+
+ @Override
+ public int getRecordSize() {
+ return 2;
+ }
+
+ @Override
+ public SelectionVectorType getType() {
+ return SelectionVectorType.SV_INT16;
+ }
+
+ @Override
+ public int getIndex(int index) {
+ checkReadBounds(index);
+
+ char value = getBuffer().getChar(index * getRecordSize());
+ return (int) value;
+ }
+}
diff --git a/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/SelectionVectorInt32.java b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/SelectionVectorInt32.java
new file mode 100644
index 000000000..c938f6691
--- /dev/null
+++ b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/SelectionVectorInt32.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.evaluator;
+
+import org.apache.arrow.gandiva.ipc.GandivaTypes.SelectionVectorType;
+import org.apache.arrow.memory.ArrowBuf;
+
+/**
+ * Selection vector with records of arrow type INT32.
+ */
+public class SelectionVectorInt32 extends SelectionVector {
+
+ public SelectionVectorInt32(ArrowBuf buffer) {
+ super(buffer);
+ }
+
+ @Override
+ public int getRecordSize() {
+ return 4;
+ }
+
+ @Override
+ public SelectionVectorType getType() {
+ return SelectionVectorType.SV_INT32;
+ }
+
+ @Override
+ public int getIndex(int index) {
+ checkReadBounds(index);
+
+ return getBuffer().getInt(index * getRecordSize());
+ }
+}
diff --git a/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/VectorExpander.java b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/VectorExpander.java
new file mode 100644
index 000000000..f22ebbd37
--- /dev/null
+++ b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/VectorExpander.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.evaluator;
+
+import org.apache.arrow.vector.BaseVariableWidthVector;
+
+/**
+ * This class provides the functionality to expand output vectors using a callback mechanism from
+ * gandiva.
+ */
+public class VectorExpander {
+ private final BaseVariableWidthVector[] vectors;
+
+ public VectorExpander(BaseVariableWidthVector[] vectors) {
+ this.vectors = vectors;
+ }
+
+ /**
+ * Result of vector expansion.
+ */
+ public static class ExpandResult {
+ public long address;
+ public long capacity;
+
+ public ExpandResult(long address, long capacity) {
+ this.address = address;
+ this.capacity = capacity;
+ }
+ }
+
+ /**
+ * Expand vector at specified index. This is used as a back call from jni, and is only
+ * relevant for variable width vectors.
+ *
+ * @param index index of buffer in the list passed to jni.
+ * @param toCapacity the size to which the buffer should be expanded to.
+ *
+ * @return address and size of the buffer after expansion.
+ */
+ public ExpandResult expandOutputVectorAtIndex(int index, long toCapacity) {
+ if (index >= vectors.length || vectors[index] == null) {
+ throw new IllegalArgumentException("invalid index " + index);
+ }
+
+ BaseVariableWidthVector vector = vectors[index];
+ while (vector.getDataBuffer().capacity() < toCapacity) {
+ vector.reallocDataBuffer();
+ }
+ return new ExpandResult(
+ vector.getDataBuffer().memoryAddress(),
+ vector.getDataBuffer().capacity());
+ }
+
+}
diff --git a/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/exceptions/EvaluatorClosedException.java b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/exceptions/EvaluatorClosedException.java
new file mode 100644
index 000000000..d3fb8b60d
--- /dev/null
+++ b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/exceptions/EvaluatorClosedException.java
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.exceptions;
+
+/** Indicates an attempted call to methods on a closed evaluator. */
+public class EvaluatorClosedException extends GandivaException {
+ public EvaluatorClosedException() {
+ super("Cannot invoke methods on evaluator after closing it");
+ }
+}
diff --git a/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/exceptions/GandivaException.java b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/exceptions/GandivaException.java
new file mode 100644
index 000000000..e7fce58a3
--- /dev/null
+++ b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/exceptions/GandivaException.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.exceptions;
+
+/** Base class for all specialized exceptions this package uses. */
+public class GandivaException extends Exception {
+
+ public GandivaException(String msg) {
+ super(msg);
+ }
+
+ public GandivaException(String msg, Exception cause) {
+ super(msg, cause);
+ }
+
+ @Override
+ public String toString() {
+ return getMessage();
+ }
+}
diff --git a/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/exceptions/UnsupportedTypeException.java b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/exceptions/UnsupportedTypeException.java
new file mode 100644
index 000000000..90e06e80e
--- /dev/null
+++ b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/exceptions/UnsupportedTypeException.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.exceptions;
+
+/**
+ * Represents an exception thrown while dealing with unsupported types.
+ */
+public class UnsupportedTypeException extends GandivaException {
+ public UnsupportedTypeException(String msg) {
+ super(msg);
+ }
+}
diff --git a/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/AndNode.java b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/AndNode.java
new file mode 100644
index 000000000..ecc577fa7
--- /dev/null
+++ b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/AndNode.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.expression;
+
+import java.util.List;
+
+import org.apache.arrow.gandiva.exceptions.GandivaException;
+import org.apache.arrow.gandiva.ipc.GandivaTypes;
+
+/**
+ * Node representing a logical And expression.
+ */
+class AndNode implements TreeNode {
+ private final List<TreeNode> children;
+
+ AndNode(List<TreeNode> children) {
+ this.children = children;
+ }
+
+ @Override
+ public GandivaTypes.TreeNode toProtobuf() throws GandivaException {
+ GandivaTypes.AndNode.Builder andNode = GandivaTypes.AndNode.newBuilder();
+
+ for (TreeNode arg : children) {
+ andNode.addArgs(arg.toProtobuf());
+ }
+
+ GandivaTypes.TreeNode.Builder builder = GandivaTypes.TreeNode.newBuilder();
+ builder.setAndNode(andNode.build());
+ return builder.build();
+ }
+}
diff --git a/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/ArrowTypeHelper.java b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/ArrowTypeHelper.java
new file mode 100644
index 000000000..90f8684b4
--- /dev/null
+++ b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/ArrowTypeHelper.java
@@ -0,0 +1,350 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.expression;
+
+import org.apache.arrow.flatbuf.DateUnit;
+import org.apache.arrow.flatbuf.IntervalUnit;
+import org.apache.arrow.flatbuf.TimeUnit;
+import org.apache.arrow.flatbuf.Type;
+import org.apache.arrow.gandiva.exceptions.GandivaException;
+import org.apache.arrow.gandiva.exceptions.UnsupportedTypeException;
+import org.apache.arrow.gandiva.ipc.GandivaTypes;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+/**
+ * Utility methods to convert between Arrow and Gandiva types.
+ */
+public class ArrowTypeHelper {
+ private ArrowTypeHelper() {}
+
+ static final int WIDTH_8 = 8;
+ static final int WIDTH_16 = 16;
+ static final int WIDTH_32 = 32;
+ static final int WIDTH_64 = 64;
+
+ private static void initArrowTypeInt(
+ ArrowType.Int intType, GandivaTypes.ExtGandivaType.Builder builder) throws GandivaException {
+ int width = intType.getBitWidth();
+
+ if (intType.getIsSigned()) {
+ switch (width) {
+ case WIDTH_8: {
+ builder.setType(GandivaTypes.GandivaType.INT8);
+ return;
+ }
+ case WIDTH_16: {
+ builder.setType(GandivaTypes.GandivaType.INT16);
+ return;
+ }
+ case WIDTH_32: {
+ builder.setType(GandivaTypes.GandivaType.INT32);
+ return;
+ }
+ case WIDTH_64: {
+ builder.setType(GandivaTypes.GandivaType.INT64);
+ return;
+ }
+ default: {
+ throw new UnsupportedTypeException("Unsupported width for integer type");
+ }
+ }
+ }
+
+ // unsigned int
+ switch (width) {
+ case WIDTH_8: {
+ builder.setType(GandivaTypes.GandivaType.UINT8);
+ return;
+ }
+ case WIDTH_16: {
+ builder.setType(GandivaTypes.GandivaType.UINT16);
+ return;
+ }
+ case WIDTH_32: {
+ builder.setType(GandivaTypes.GandivaType.UINT32);
+ return;
+ }
+ case WIDTH_64: {
+ builder.setType(GandivaTypes.GandivaType.UINT64);
+ return;
+ }
+ default: {
+ throw new UnsupportedTypeException("Unsupported width for integer type");
+ }
+ }
+ }
+
+ private static void initArrowTypeFloat(
+ ArrowType.FloatingPoint floatType, GandivaTypes.ExtGandivaType.Builder builder)
+ throws GandivaException {
+ switch (floatType.getPrecision()) {
+ case HALF: {
+ builder.setType(GandivaTypes.GandivaType.HALF_FLOAT);
+ break;
+ }
+ case SINGLE: {
+ builder.setType(GandivaTypes.GandivaType.FLOAT);
+ break;
+ }
+ case DOUBLE: {
+ builder.setType(GandivaTypes.GandivaType.DOUBLE);
+ break;
+ }
+ default: {
+ throw new UnsupportedTypeException("Floating point type with unknown precision");
+ }
+ }
+ }
+
+ private static void initArrowTypeDecimal(ArrowType.Decimal decimalType,
+ GandivaTypes.ExtGandivaType.Builder builder) {
+ Preconditions.checkArgument(decimalType.getPrecision() > 0 &&
+ decimalType.getPrecision() <= 38, "Gandiva only supports decimals of upto 38 " +
+ "precision. Input precision : " + decimalType.getPrecision());
+ builder.setPrecision(decimalType.getPrecision());
+ builder.setScale(decimalType.getScale());
+ builder.setType(GandivaTypes.GandivaType.DECIMAL);
+ }
+
+ private static void initArrowTypeDate(ArrowType.Date dateType,
+ GandivaTypes.ExtGandivaType.Builder builder) {
+ short dateUnit = dateType.getUnit().getFlatbufID();
+ switch (dateUnit) {
+ case DateUnit.DAY: {
+ builder.setType(GandivaTypes.GandivaType.DATE32);
+ break;
+ }
+ case DateUnit.MILLISECOND: {
+ builder.setType(GandivaTypes.GandivaType.DATE64);
+ break;
+ }
+ default: {
+ // not supported
+ break;
+ }
+ }
+ }
+
+ private static void initArrowTypeTime(ArrowType.Time timeType,
+ GandivaTypes.ExtGandivaType.Builder builder) {
+ short timeUnit = timeType.getUnit().getFlatbufID();
+ switch (timeUnit) {
+ case TimeUnit.SECOND: {
+ builder.setType(GandivaTypes.GandivaType.TIME32);
+ builder.setTimeUnit(GandivaTypes.TimeUnit.SEC);
+ break;
+ }
+ case TimeUnit.MILLISECOND: {
+ builder.setType(GandivaTypes.GandivaType.TIME32);
+ builder.setTimeUnit(GandivaTypes.TimeUnit.MILLISEC);
+ break;
+ }
+ case TimeUnit.MICROSECOND: {
+ builder.setType(GandivaTypes.GandivaType.TIME64);
+ builder.setTimeUnit(GandivaTypes.TimeUnit.MICROSEC);
+ break;
+ }
+ case TimeUnit.NANOSECOND: {
+ builder.setType(GandivaTypes.GandivaType.TIME64);
+ builder.setTimeUnit(GandivaTypes.TimeUnit.NANOSEC);
+ break;
+ }
+ default: {
+ // not supported
+ }
+ }
+ }
+
+ private static void initArrowTypeTimestamp(ArrowType.Timestamp timestampType,
+ GandivaTypes.ExtGandivaType.Builder builder) {
+ short timeUnit = timestampType.getUnit().getFlatbufID();
+ switch (timeUnit) {
+ case TimeUnit.SECOND: {
+ builder.setType(GandivaTypes.GandivaType.TIMESTAMP);
+ builder.setTimeUnit(GandivaTypes.TimeUnit.SEC);
+ break;
+ }
+ case TimeUnit.MILLISECOND: {
+ builder.setType(GandivaTypes.GandivaType.TIMESTAMP);
+ builder.setTimeUnit(GandivaTypes.TimeUnit.MILLISEC);
+ break;
+ }
+ case TimeUnit.MICROSECOND: {
+ builder.setType(GandivaTypes.GandivaType.TIMESTAMP);
+ builder.setTimeUnit(GandivaTypes.TimeUnit.MICROSEC);
+ break;
+ }
+ case TimeUnit.NANOSECOND: {
+ builder.setType(GandivaTypes.GandivaType.TIMESTAMP);
+ builder.setTimeUnit(GandivaTypes.TimeUnit.NANOSEC);
+ break;
+ }
+ default: {
+ // not supported
+ }
+ }
+ }
+
+ private static void initArrowTypeInterval(ArrowType.Interval interval,
+ GandivaTypes.ExtGandivaType.Builder builder) {
+ short intervalUnit = interval.getUnit().getFlatbufID();
+ switch (intervalUnit) {
+ case IntervalUnit.YEAR_MONTH: {
+ builder.setType(GandivaTypes.GandivaType.INTERVAL);
+ builder.setIntervalType(GandivaTypes.IntervalType.YEAR_MONTH);
+ break;
+ }
+ case IntervalUnit.DAY_TIME: {
+ builder.setType(GandivaTypes.GandivaType.INTERVAL);
+ builder.setIntervalType(GandivaTypes.IntervalType.DAY_TIME);
+ break;
+ }
+ default: {
+ // not supported
+ }
+ }
+ }
+
+ /**
+ * Converts an arrow type into a protobuf.
+ *
+ * @param arrowType Arrow type to be converted
+ * @return Protobuf representing the arrow type
+ */
+ public static GandivaTypes.ExtGandivaType arrowTypeToProtobuf(ArrowType arrowType)
+ throws GandivaException {
+ GandivaTypes.ExtGandivaType.Builder builder = GandivaTypes.ExtGandivaType.newBuilder();
+
+ byte typeId = arrowType.getTypeID().getFlatbufID();
+ switch (typeId) {
+ case Type.NONE: { // 0
+ builder.setType(GandivaTypes.GandivaType.NONE);
+ break;
+ }
+ case Type.Null: { // 1
+ // TODO: Need to handle this later
+ break;
+ }
+ case Type.Int: { // 2
+ ArrowTypeHelper.initArrowTypeInt((ArrowType.Int) arrowType, builder);
+ break;
+ }
+ case Type.FloatingPoint: { // 3
+ ArrowTypeHelper.initArrowTypeFloat((ArrowType.FloatingPoint) arrowType, builder);
+ break;
+ }
+ case Type.Binary: { // 4
+ builder.setType(GandivaTypes.GandivaType.BINARY);
+ break;
+ }
+ case Type.Utf8: { // 5
+ builder.setType(GandivaTypes.GandivaType.UTF8);
+ break;
+ }
+ case Type.Bool: { // 6
+ builder.setType(GandivaTypes.GandivaType.BOOL);
+ break;
+ }
+ case Type.Decimal: { // 7
+ ArrowTypeHelper.initArrowTypeDecimal((ArrowType.Decimal) arrowType, builder);
+ break;
+ }
+ case Type.Date: { // 8
+ ArrowTypeHelper.initArrowTypeDate((ArrowType.Date) arrowType, builder);
+ break;
+ }
+ case Type.Time: { // 9
+ ArrowTypeHelper.initArrowTypeTime((ArrowType.Time) arrowType, builder);
+ break;
+ }
+ case Type.Timestamp: { // 10
+ ArrowTypeHelper.initArrowTypeTimestamp((ArrowType.Timestamp) arrowType, builder);
+ break;
+ }
+ case Type.Interval: { // 11
+ ArrowTypeHelper.initArrowTypeInterval((ArrowType.Interval) arrowType, builder);
+ break;
+ }
+ case Type.List: { // 12
+ break;
+ }
+ case Type.Struct_: { // 13
+ break;
+ }
+ case Type.Union: { // 14
+ break;
+ }
+ case Type.FixedSizeBinary: { // 15
+ break;
+ }
+ case Type.FixedSizeList: { // 16
+ break;
+ }
+ case Type.Map: { // 17
+ break;
+ }
+ default: {
+ break;
+ }
+ }
+
+ if (!builder.hasType()) {
+ // type has not been set
+ // throw an exception
+ throw new UnsupportedTypeException("Unsupported type " + arrowType.toString());
+ }
+
+ return builder.build();
+ }
+
+ /**
+ * Converts an arrow field object to a protobuf.
+ * @param field Arrow field to be converted
+ * @return Protobuf representing the arrow field
+ */
+ public static GandivaTypes.Field arrowFieldToProtobuf(Field field) throws GandivaException {
+ GandivaTypes.Field.Builder builder = GandivaTypes.Field.newBuilder();
+ builder.setName(field.getName());
+ builder.setType(ArrowTypeHelper.arrowTypeToProtobuf(field.getType()));
+ builder.setNullable(field.isNullable());
+
+ for (Field child : field.getChildren()) {
+ builder.addChildren(ArrowTypeHelper.arrowFieldToProtobuf(child));
+ }
+
+ return builder.build();
+ }
+
+ /**
+ * Converts a schema object to a protobuf.
+ * @param schema Schema object to be converted
+ * @return Protobuf representing a schema object
+ */
+ public static GandivaTypes.Schema arrowSchemaToProtobuf(Schema schema) throws GandivaException {
+ GandivaTypes.Schema.Builder builder = GandivaTypes.Schema.newBuilder();
+
+ for (Field field : schema.getFields()) {
+ builder.addColumns(ArrowTypeHelper.arrowFieldToProtobuf(field));
+ }
+
+ return builder.build();
+ }
+}
diff --git a/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/BinaryNode.java b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/BinaryNode.java
new file mode 100644
index 000000000..8455f29b2
--- /dev/null
+++ b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/BinaryNode.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.expression;
+
+import org.apache.arrow.gandiva.exceptions.GandivaException;
+import org.apache.arrow.gandiva.ipc.GandivaTypes;
+
+import com.google.protobuf.ByteString;
+
+/**
+ * Used to represent expression tree nodes representing binary constants.
+ */
+class BinaryNode implements TreeNode {
+ private final byte[] value;
+
+ public BinaryNode(byte[] value) {
+ this.value = value;
+ }
+
+ @Override
+ public GandivaTypes.TreeNode toProtobuf() throws GandivaException {
+ GandivaTypes.BinaryNode binaryNode = GandivaTypes.BinaryNode.newBuilder()
+ .setValue(ByteString.copyFrom(value))
+ .build();
+
+ return GandivaTypes.TreeNode.newBuilder()
+ .setBinaryNode(binaryNode)
+ .build();
+ }
+}
diff --git a/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/BooleanNode.java b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/BooleanNode.java
new file mode 100644
index 000000000..505f01919
--- /dev/null
+++ b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/BooleanNode.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.expression;
+
+import org.apache.arrow.gandiva.exceptions.GandivaException;
+import org.apache.arrow.gandiva.ipc.GandivaTypes;
+
+/**
+ * Used to represent expression tree nodes representing boolean constants.
+ * Used while creating expressions like if (!x).
+ */
+class BooleanNode implements TreeNode {
+ private final Boolean value;
+
+ BooleanNode(Boolean value) {
+ this.value = value;
+ }
+
+ @Override
+ public GandivaTypes.TreeNode toProtobuf() throws GandivaException {
+ GandivaTypes.BooleanNode.Builder boolBuilder = GandivaTypes.BooleanNode.newBuilder();
+ boolBuilder.setValue(value.booleanValue());
+
+ GandivaTypes.TreeNode.Builder builder = GandivaTypes.TreeNode.newBuilder();
+ builder.setBooleanNode(boolBuilder.build());
+ return builder.build();
+ }
+}
diff --git a/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/Condition.java b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/Condition.java
new file mode 100644
index 000000000..1d584d673
--- /dev/null
+++ b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/Condition.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.expression;
+
+import org.apache.arrow.gandiva.exceptions.GandivaException;
+import org.apache.arrow.gandiva.ipc.GandivaTypes;
+
+/**
+ * Opaque class representing a filter condition.
+ */
+public class Condition {
+ private final TreeNode root;
+
+ Condition(TreeNode root) {
+ this.root = root;
+ }
+
+ /**
+ * Converts an condition expression into a protobuf.
+ * @return A protobuf representing the condition expression tree
+ */
+ public GandivaTypes.Condition toProtobuf() throws GandivaException {
+ GandivaTypes.Condition.Builder builder = GandivaTypes.Condition.newBuilder();
+ builder.setRoot(root.toProtobuf());
+ return builder.build();
+ }
+}
diff --git a/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/DecimalNode.java b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/DecimalNode.java
new file mode 100644
index 000000000..bf17aa0aa
--- /dev/null
+++ b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/DecimalNode.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.expression;
+
+import org.apache.arrow.gandiva.exceptions.GandivaException;
+import org.apache.arrow.gandiva.ipc.GandivaTypes;
+
+/**
+ * Used to represent expression tree nodes representing decimal constants.
+ * Used in the expression (x + 5.0)
+ */
+class DecimalNode implements TreeNode {
+ private final String value;
+ private final int precision;
+ private final int scale;
+
+ DecimalNode(String value, int precision, int scale) {
+ this.value = value;
+ this.precision = precision;
+ this.scale = scale;
+ }
+
+ @Override
+ public GandivaTypes.TreeNode toProtobuf() throws GandivaException {
+ GandivaTypes.DecimalNode.Builder decimalNode = GandivaTypes.DecimalNode.newBuilder();
+ decimalNode.setValue(value);
+ decimalNode.setPrecision(precision);
+ decimalNode.setScale(scale);
+
+ GandivaTypes.TreeNode.Builder builder = GandivaTypes.TreeNode.newBuilder();
+ builder.setDecimalNode(decimalNode.build());
+ return builder.build();
+ }
+}
diff --git a/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/DoubleNode.java b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/DoubleNode.java
new file mode 100644
index 000000000..f7a9436f1
--- /dev/null
+++ b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/DoubleNode.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.expression;
+
+import org.apache.arrow.gandiva.exceptions.GandivaException;
+import org.apache.arrow.gandiva.ipc.GandivaTypes;
+
+/**
+ * Used to represent expression tree nodes representing double constants.
+ * Used in the expression (x + 5.0)
+ */
+class DoubleNode implements TreeNode {
+ private final Double value;
+
+ DoubleNode(Double value) {
+ this.value = value;
+ }
+
+ @Override
+ public GandivaTypes.TreeNode toProtobuf() throws GandivaException {
+ GandivaTypes.DoubleNode.Builder doubleBuilder = GandivaTypes.DoubleNode.newBuilder();
+ doubleBuilder.setValue(value.doubleValue());
+
+ GandivaTypes.TreeNode.Builder builder = GandivaTypes.TreeNode.newBuilder();
+ builder.setDoubleNode(doubleBuilder.build());
+ return builder.build();
+ }
+}
diff --git a/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/ExpressionTree.java b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/ExpressionTree.java
new file mode 100644
index 000000000..353c8d12b
--- /dev/null
+++ b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/ExpressionTree.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.expression;
+
+import org.apache.arrow.gandiva.exceptions.GandivaException;
+import org.apache.arrow.gandiva.ipc.GandivaTypes;
+import org.apache.arrow.vector.types.pojo.Field;
+
+/**
+ * Opaque class representing an expression.
+ */
+public class ExpressionTree {
+ private final TreeNode root;
+ private final Field resultField;
+
+ ExpressionTree(TreeNode root, Field resultField) {
+ this.root = root;
+ this.resultField = resultField;
+ }
+
+ /**
+ * Converts an expression tree into a protobuf.
+ * @return A protobuf representing the expression tree
+ */
+ public GandivaTypes.ExpressionRoot toProtobuf() throws GandivaException {
+ GandivaTypes.ExpressionRoot.Builder builder = GandivaTypes.ExpressionRoot.newBuilder();
+ builder.setRoot(root.toProtobuf());
+ builder.setResultType(ArrowTypeHelper.arrowFieldToProtobuf(resultField));
+ return builder.build();
+ }
+}
diff --git a/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/FieldNode.java b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/FieldNode.java
new file mode 100644
index 000000000..893bf7191
--- /dev/null
+++ b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/FieldNode.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.expression;
+
+import org.apache.arrow.gandiva.exceptions.GandivaException;
+import org.apache.arrow.gandiva.ipc.GandivaTypes;
+import org.apache.arrow.vector.types.pojo.Field;
+
+/**
+ * Opaque class that represents a tree node.
+ */
+class FieldNode implements TreeNode {
+ private final Field field;
+
+ FieldNode(Field field) {
+ this.field = field;
+ }
+
+ @Override
+ public GandivaTypes.TreeNode toProtobuf() throws GandivaException {
+ GandivaTypes.FieldNode.Builder fieldNode = GandivaTypes.FieldNode.newBuilder();
+ fieldNode.setField(ArrowTypeHelper.arrowFieldToProtobuf(field));
+
+ GandivaTypes.TreeNode.Builder builder = GandivaTypes.TreeNode.newBuilder();
+ builder.setFieldNode(fieldNode.build());
+ return builder.build();
+ }
+}
diff --git a/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/FloatNode.java b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/FloatNode.java
new file mode 100644
index 000000000..6afe96bfe
--- /dev/null
+++ b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/FloatNode.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.expression;
+
+import org.apache.arrow.gandiva.exceptions.GandivaException;
+import org.apache.arrow.gandiva.ipc.GandivaTypes;
+
+/**
+ * Used to represent expression tree nodes representing float constants.
+ * Used in the expression (x + 5.0)
+ */
+class FloatNode implements TreeNode {
+ private final Float value;
+
+ public FloatNode(Float value) {
+ this.value = value;
+ }
+
+ @Override
+ public GandivaTypes.TreeNode toProtobuf() throws GandivaException {
+ GandivaTypes.FloatNode.Builder floatBuilder = GandivaTypes.FloatNode.newBuilder();
+ floatBuilder.setValue(value.floatValue());
+
+ GandivaTypes.TreeNode.Builder builder = GandivaTypes.TreeNode.newBuilder();
+ builder.setFloatNode(floatBuilder.build());
+ return builder.build();
+ }
+}
diff --git a/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/FunctionNode.java b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/FunctionNode.java
new file mode 100644
index 000000000..ead1e146d
--- /dev/null
+++ b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/FunctionNode.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.expression;
+
+import java.util.List;
+
+import org.apache.arrow.gandiva.exceptions.GandivaException;
+import org.apache.arrow.gandiva.ipc.GandivaTypes;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+
+/**
+ * Node representing an arbitrary function in an expression.
+ */
+class FunctionNode implements TreeNode {
+ private final String function;
+ private final List<TreeNode> children;
+ private final ArrowType retType;
+
+ FunctionNode(String function, List<TreeNode> children, ArrowType retType) {
+ this.function = function;
+ this.children = children;
+ this.retType = retType;
+ }
+
+ @Override
+ public GandivaTypes.TreeNode toProtobuf() throws GandivaException {
+ GandivaTypes.FunctionNode.Builder fnNode = GandivaTypes.FunctionNode.newBuilder();
+ fnNode.setFunctionName(function);
+ fnNode.setReturnType(ArrowTypeHelper.arrowTypeToProtobuf(retType));
+
+ for (TreeNode arg : children) {
+ fnNode.addInArgs(arg.toProtobuf());
+ }
+
+ GandivaTypes.TreeNode.Builder builder = GandivaTypes.TreeNode.newBuilder();
+ builder.setFnNode(fnNode.build());
+ return builder.build();
+ }
+}
diff --git a/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/IfNode.java b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/IfNode.java
new file mode 100644
index 000000000..19f9095fb
--- /dev/null
+++ b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/IfNode.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.expression;
+
+import org.apache.arrow.gandiva.exceptions.GandivaException;
+import org.apache.arrow.gandiva.ipc.GandivaTypes;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+
+/**
+ * Node representing a if-then-else block expression.
+ */
+class IfNode implements TreeNode {
+ private final TreeNode condition;
+ private final TreeNode thenNode;
+ private final TreeNode elseNode;
+ private final ArrowType retType;
+
+ IfNode(TreeNode condition, TreeNode thenNode, TreeNode elseNode, ArrowType retType) {
+ this.condition = condition;
+ this.thenNode = thenNode;
+ this.elseNode = elseNode;
+ this.retType = retType;
+ }
+
+ @Override
+ public GandivaTypes.TreeNode toProtobuf() throws GandivaException {
+ GandivaTypes.IfNode.Builder ifNodeBuilder = GandivaTypes.IfNode.newBuilder();
+ ifNodeBuilder.setCond(condition.toProtobuf());
+ ifNodeBuilder.setThenNode(thenNode.toProtobuf());
+ ifNodeBuilder.setElseNode(elseNode.toProtobuf());
+ ifNodeBuilder.setReturnType(ArrowTypeHelper.arrowTypeToProtobuf(retType));
+
+ GandivaTypes.TreeNode.Builder builder = GandivaTypes.TreeNode.newBuilder();
+ builder.setIfNode(ifNodeBuilder.build());
+ return builder.build();
+ }
+}
diff --git a/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/InNode.java b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/InNode.java
new file mode 100644
index 000000000..0f8de9628
--- /dev/null
+++ b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/InNode.java
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.expression;
+
+import java.math.BigDecimal;
+import java.nio.charset.Charset;
+import java.util.Set;
+
+import org.apache.arrow.gandiva.exceptions.GandivaException;
+import org.apache.arrow.gandiva.ipc.GandivaTypes;
+
+import com.google.protobuf.ByteString;
+
+/**
+ * In Node representation in java.
+ */
+public class InNode implements TreeNode {
+ private static final Charset charset = Charset.forName("UTF-8");
+
+ private final Set<Integer> intValues;
+ private final Set<Long> longValues;
+ private final Set<Float> floatValues;
+ private final Set<Double> doubleValues;
+ private final Set<BigDecimal> decimalValues;
+ private final Set<String> stringValues;
+ private final Set<byte[]> binaryValues;
+ private final TreeNode input;
+
+ private final Integer precision;
+ private final Integer scale;
+
+ private InNode(Set<Integer> values, Set<Long> longValues, Set<String> stringValues, Set<byte[]>
+ binaryValues, Set<BigDecimal> decimalValues, Integer precision, Integer scale,
+ Set<Float> floatValues, Set<Double> doubleValues, TreeNode node) {
+ this.intValues = values;
+ this.longValues = longValues;
+ this.decimalValues = decimalValues;
+ this.precision = precision;
+ this.scale = scale;
+ this.stringValues = stringValues;
+ this.binaryValues = binaryValues;
+ this.floatValues = floatValues;
+ this.doubleValues = doubleValues;
+ this.input = node;
+ }
+
+ /**
+ * Makes an IN node for int values.
+ *
+ * @param node Node with the 'IN' clause.
+ * @param intValues Int values to build the IN node.
+ * @retur InNode referring to tree node.
+ */
+ public static InNode makeIntInExpr(TreeNode node, Set<Integer> intValues) {
+ return new InNode(intValues,
+ null, null, null, null, null, null, null,
+ null, node);
+ }
+
+ /**
+ * Makes an IN node for long values.
+ *
+ * @param node Node with the 'IN' clause.
+ * @param longValues Long values to build the IN node.
+ * @retur InNode referring to tree node.
+ */
+ public static InNode makeLongInExpr(TreeNode node, Set<Long> longValues) {
+ return new InNode(null, longValues,
+ null, null, null, null, null, null,
+ null, node);
+ }
+
+ /**
+ * Makes an IN node for float values.
+ *
+ * @param node Node with the 'IN' clause.
+ * @param floatValues Float values to build the IN node.
+ * @retur InNode referring to tree node.
+ */
+ public static InNode makeFloatInExpr(TreeNode node, Set<Float> floatValues) {
+ return new InNode(null, null, null, null, null, null,
+ null, floatValues, null, node);
+ }
+
+ /**
+ * Makes an IN node for double values.
+ *
+ * @param node Node with the 'IN' clause.
+ * @param doubleValues Double values to build the IN node.
+ * @retur InNode referring to tree node.
+ */
+ public static InNode makeDoubleInExpr(TreeNode node, Set<Double> doubleValues) {
+ return new InNode(null, null, null, null, null,
+ null, null, null, doubleValues, node);
+ }
+
+ public static InNode makeDecimalInExpr(TreeNode node, Set<BigDecimal> decimalValues,
+ Integer precision, Integer scale) {
+ return new InNode(null, null, null, null,
+ decimalValues, precision, scale, null, null, node);
+ }
+
+ public static InNode makeStringInExpr(TreeNode node, Set<String> stringValues) {
+ return new InNode(null, null, stringValues, null,
+ null, null, null, null, null, node);
+ }
+
+ public static InNode makeBinaryInExpr(TreeNode node, Set<byte[]> binaryValues) {
+ return new InNode(null, null, null, binaryValues,
+ null, null, null, null, null, node);
+ }
+
+ @Override
+ public GandivaTypes.TreeNode toProtobuf() throws GandivaException {
+ GandivaTypes.InNode.Builder inNode = GandivaTypes.InNode.newBuilder();
+
+ inNode.setNode(input.toProtobuf());
+
+ if (intValues != null) {
+ GandivaTypes.IntConstants.Builder intConstants = GandivaTypes.IntConstants.newBuilder();
+ intValues.stream().forEach(val -> intConstants.addIntValues(GandivaTypes.IntNode.newBuilder()
+ .setValue(val).build()));
+ inNode.setIntValues(intConstants.build());
+ } else if (longValues != null) {
+ GandivaTypes.LongConstants.Builder longConstants = GandivaTypes.LongConstants.newBuilder();
+ longValues.stream().forEach(val -> longConstants.addLongValues(GandivaTypes.LongNode.newBuilder()
+ .setValue(val).build()));
+ inNode.setLongValues(longConstants.build());
+ } else if (floatValues != null) {
+ GandivaTypes.FloatConstants.Builder floatConstants = GandivaTypes.FloatConstants.newBuilder();
+ floatValues.stream().forEach(val -> floatConstants.addFloatValues(GandivaTypes.FloatNode.newBuilder()
+ .setValue(val).build()));
+ inNode.setFloatValues(floatConstants.build());
+ } else if (doubleValues != null) {
+ GandivaTypes.DoubleConstants.Builder doubleConstants = GandivaTypes.DoubleConstants.newBuilder();
+ doubleValues.stream().forEach(val -> doubleConstants.addDoubleValues(GandivaTypes.DoubleNode.newBuilder()
+ .setValue(val).build()));
+ inNode.setDoubleValues(doubleConstants.build());
+ } else if (decimalValues != null) {
+ GandivaTypes.DecimalConstants.Builder decimalConstants = GandivaTypes.DecimalConstants.newBuilder();
+ decimalValues.stream().forEach(val -> decimalConstants.addDecimalValues(GandivaTypes.DecimalNode.newBuilder()
+ .setValue(val.toPlainString()).setPrecision(precision).setScale(scale).build()));
+ inNode.setDecimalValues(decimalConstants.build());
+ } else if (stringValues != null) {
+ GandivaTypes.StringConstants.Builder stringConstants = GandivaTypes.StringConstants
+ .newBuilder();
+ stringValues.stream().forEach(val -> stringConstants.addStringValues(GandivaTypes.StringNode
+ .newBuilder().setValue(ByteString.copyFrom(val.getBytes(charset))).build()));
+ inNode.setStringValues(stringConstants.build());
+ } else if (binaryValues != null) {
+ GandivaTypes.BinaryConstants.Builder binaryConstants = GandivaTypes.BinaryConstants
+ .newBuilder();
+ binaryValues.stream().forEach(val -> binaryConstants.addBinaryValues(GandivaTypes.BinaryNode
+ .newBuilder().setValue(ByteString.copyFrom(val)).build()));
+ inNode.setBinaryValues(binaryConstants.build());
+ }
+ GandivaTypes.TreeNode.Builder builder = GandivaTypes.TreeNode.newBuilder();
+ builder.setInNode(inNode.build());
+ return builder.build();
+ }
+}
diff --git a/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/IntNode.java b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/IntNode.java
new file mode 100644
index 000000000..c3858ef7e
--- /dev/null
+++ b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/IntNode.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.expression;
+
+import org.apache.arrow.gandiva.exceptions.GandivaException;
+import org.apache.arrow.gandiva.ipc.GandivaTypes;
+
+/**
+ * Used to represent expression tree nodes representing int constants.
+ * Used in the expression (x + 5)
+ */
+class IntNode implements TreeNode {
+ private final Integer value;
+
+ IntNode(Integer value) {
+ this.value = value;
+ }
+
+ @Override
+ public GandivaTypes.TreeNode toProtobuf() throws GandivaException {
+ GandivaTypes.IntNode.Builder intBuilder = GandivaTypes.IntNode.newBuilder();
+ intBuilder.setValue(value.intValue());
+
+ GandivaTypes.TreeNode.Builder builder = GandivaTypes.TreeNode.newBuilder();
+ builder.setIntNode(intBuilder.build());
+ return builder.build();
+ }
+}
diff --git a/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/LongNode.java b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/LongNode.java
new file mode 100644
index 000000000..311c5d94d
--- /dev/null
+++ b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/LongNode.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.expression;
+
+import org.apache.arrow.gandiva.exceptions.GandivaException;
+import org.apache.arrow.gandiva.ipc.GandivaTypes;
+
+/**
+ * Used to represent expression tree nodes representing long constants.
+ * Used in the expression (x + 5L)
+ */
+class LongNode implements TreeNode {
+ private final Long value;
+
+ LongNode(Long value) {
+ this.value = value;
+ }
+
+ @Override
+ public GandivaTypes.TreeNode toProtobuf() throws GandivaException {
+ GandivaTypes.LongNode.Builder longBuilder = GandivaTypes.LongNode.newBuilder();
+ longBuilder.setValue(value.longValue());
+
+ GandivaTypes.TreeNode.Builder builder = GandivaTypes.TreeNode.newBuilder();
+ builder.setLongNode(longBuilder.build());
+ return builder.build();
+ }
+}
diff --git a/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/NullNode.java b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/NullNode.java
new file mode 100644
index 000000000..a8e7d6f82
--- /dev/null
+++ b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/NullNode.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.expression;
+
+import org.apache.arrow.gandiva.exceptions.GandivaException;
+import org.apache.arrow.gandiva.ipc.GandivaTypes;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+
+/** An expression indicating a null value. */
+class NullNode implements TreeNode {
+ private final ArrowType type;
+
+ NullNode(ArrowType type) {
+ this.type = type;
+ }
+
+ @Override
+ public GandivaTypes.TreeNode toProtobuf() throws GandivaException {
+ GandivaTypes.NullNode.Builder nullNode = GandivaTypes.NullNode.newBuilder();
+ nullNode.setType(ArrowTypeHelper.arrowTypeToProtobuf(type));
+
+ GandivaTypes.TreeNode.Builder builder = GandivaTypes.TreeNode.newBuilder();
+ builder.setNullNode(nullNode.build());
+ return builder.build();
+ }
+}
diff --git a/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/OrNode.java b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/OrNode.java
new file mode 100644
index 000000000..2dbdfed7c
--- /dev/null
+++ b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/OrNode.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.expression;
+
+import java.util.List;
+
+import org.apache.arrow.gandiva.exceptions.GandivaException;
+import org.apache.arrow.gandiva.ipc.GandivaTypes;
+
+/**
+ * Represents a logical OR Node.
+ */
+class OrNode implements TreeNode {
+ private final List<TreeNode> children;
+
+ OrNode(List<TreeNode> children) {
+ this.children = children;
+ }
+
+ @Override
+ public GandivaTypes.TreeNode toProtobuf() throws GandivaException {
+ GandivaTypes.OrNode.Builder orNode = GandivaTypes.OrNode.newBuilder();
+
+ for (TreeNode arg : children) {
+ orNode.addArgs(arg.toProtobuf());
+ }
+
+ GandivaTypes.TreeNode.Builder builder = GandivaTypes.TreeNode.newBuilder();
+ builder.setOrNode(orNode.build());
+ return builder.build();
+ }
+}
diff --git a/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/StringNode.java b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/StringNode.java
new file mode 100644
index 000000000..a44329739
--- /dev/null
+++ b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/StringNode.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.expression;
+
+import java.nio.charset.Charset;
+
+import org.apache.arrow.gandiva.exceptions.GandivaException;
+import org.apache.arrow.gandiva.ipc.GandivaTypes;
+
+import com.google.protobuf.ByteString;
+
+/**
+ * Used to represent expression tree nodes representing utf8 constants.
+ */
+class StringNode implements TreeNode {
+ private static final Charset charset = Charset.forName("UTF-8");
+ private final String value;
+
+ public StringNode(String value) {
+ this.value = value;
+ }
+
+ @Override
+ public GandivaTypes.TreeNode toProtobuf() throws GandivaException {
+ GandivaTypes.StringNode stringNode = GandivaTypes.StringNode.newBuilder()
+ .setValue(ByteString.copyFrom(value.getBytes(charset)))
+ .build();
+
+ return GandivaTypes.TreeNode.newBuilder()
+ .setStringNode(stringNode)
+ .build();
+ }
+}
diff --git a/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeBuilder.java b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeBuilder.java
new file mode 100644
index 000000000..8656e886a
--- /dev/null
+++ b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeBuilder.java
@@ -0,0 +1,230 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.expression;
+
+import java.math.BigDecimal;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+
+/**
+ * Contains helper functions for constructing expression trees.
+ */
+public class TreeBuilder {
+ private TreeBuilder() {}
+
+ /**
+ * Helper functions to create literal constants.
+ */
+ public static TreeNode makeLiteral(Boolean booleanConstant) {
+ return new BooleanNode(booleanConstant);
+ }
+
+ public static TreeNode makeLiteral(Float floatConstant) {
+ return new FloatNode(floatConstant);
+ }
+
+ public static TreeNode makeLiteral(Double doubleConstant) {
+ return new DoubleNode(doubleConstant);
+ }
+
+ public static TreeNode makeLiteral(Integer integerConstant) {
+ return new IntNode(integerConstant);
+ }
+
+ public static TreeNode makeLiteral(Long longConstant) {
+ return new LongNode(longConstant);
+ }
+
+ public static TreeNode makeStringLiteral(String stringConstant) {
+ return new StringNode(stringConstant);
+ }
+
+ public static TreeNode makeBinaryLiteral(byte[] binaryConstant) {
+ return new BinaryNode(binaryConstant);
+ }
+
+ public static TreeNode makeDecimalLiteral(String decimalConstant, int precision, int scale) {
+ return new DecimalNode(decimalConstant, precision, scale);
+ }
+
+ /**
+ * create a null literal.
+ */
+ public static TreeNode makeNull(ArrowType type) {
+ return new NullNode(type);
+ }
+
+ /**
+ * Invoke this function to create a node representing a field, e.g. a column name.
+ *
+ * @param field represents the input argument - includes the name and type of the field
+ * @return Node representing a field
+ */
+ public static TreeNode makeField(Field field) {
+ return new FieldNode(field);
+ }
+
+ /**
+ * Invoke this function to create a node representing a function.
+ *
+ * @param function Name of the function, e.g. add
+ * @param children The arguments to the function
+ * @param retType The type of the return value of the operator
+ * @return Node representing a function
+ */
+ public static TreeNode makeFunction(String function,
+ List<TreeNode> children,
+ ArrowType retType) {
+ return new FunctionNode(function, children, retType);
+ }
+
+ /**
+ * Invoke this function to create a node representing an if-clause.
+ *
+ * @param condition Node representing the condition
+ * @param thenNode Node representing the if-block
+ * @param elseNode Node representing the else-block
+ * @param retType Return type of the node
+ * @return Node representing an if-clause
+ */
+ public static TreeNode makeIf(TreeNode condition,
+ TreeNode thenNode,
+ TreeNode elseNode,
+ ArrowType retType) {
+ return new IfNode(condition, thenNode, elseNode, retType);
+ }
+
+ /**
+ * Invoke this function to create a node representing an and-clause.
+ *
+ * @param nodes Nodes in the 'and' clause.
+ * @return Node representing an and-clause
+ */
+ public static TreeNode makeAnd(List<TreeNode> nodes) {
+ return new AndNode(nodes);
+ }
+
+ /**
+ * Invoke this function to create a node representing an or-clause.
+ *
+ * @param nodes Nodes in the 'or' clause.
+ * @return Node representing an or-clause
+ */
+ public static TreeNode makeOr(List<TreeNode> nodes) {
+ return new OrNode(nodes);
+ }
+
+ /**
+ * Invoke this function to create an expression tree.
+ *
+ * @param root is returned by a call to MakeField, MakeFunction, or MakeIf
+ * @param resultField represents the return value of the expression
+ * @return ExpressionTree referring to the root of an expression tree
+ */
+ public static ExpressionTree makeExpression(TreeNode root,
+ Field resultField) {
+ return new ExpressionTree(root, resultField);
+ }
+
+ /**
+ * Short cut to create an expression tree involving a single function, e.g. a+b+c.
+ *
+ * @param function Name of the function, e.g. add()
+ * @param inFields In arguments to the function
+ * @param resultField represents the return value of the expression
+ * @return ExpressionTree referring to the root of an expression tree
+ */
+ public static ExpressionTree makeExpression(String function,
+ List<Field> inFields,
+ Field resultField) {
+ List<TreeNode> children = new ArrayList<TreeNode>(inFields.size());
+ for (Field field : inFields) {
+ children.add(makeField(field));
+ }
+
+ TreeNode root = makeFunction(function, children, resultField.getType());
+ return makeExpression(root, resultField);
+ }
+
+ /**
+ * Invoke this function to create a condition.
+ *
+ * @param root is returned by a call to MakeField, MakeFunction, MakeIf, ..
+ * @return condition referring to the root of an expression tree
+ */
+ public static Condition makeCondition(TreeNode root) {
+ return new Condition(root);
+ }
+
+ /**
+ * Short cut to create an expression tree involving a single function, e.g. a+b+c.
+ *
+ * @param function Name of the function, e.g. add()
+ * @param inFields In arguments to the function
+ * @return condition referring to the root of an expression tree
+ */
+ public static Condition makeCondition(String function,
+ List<Field> inFields) {
+ List<TreeNode> children = new ArrayList<>(inFields.size());
+ for (Field field : inFields) {
+ children.add(makeField(field));
+ }
+
+ TreeNode root = makeFunction(function, children, new ArrowType.Bool());
+ return makeCondition(root);
+ }
+
+ public static TreeNode makeInExpressionInt32(TreeNode resultNode,
+ Set<Integer> intValues) {
+ return InNode.makeIntInExpr(resultNode, intValues);
+ }
+
+ public static TreeNode makeInExpressionBigInt(TreeNode resultNode,
+ Set<Long> longValues) {
+ return InNode.makeLongInExpr(resultNode, longValues);
+ }
+
+ public static TreeNode makeInExpressionDecimal(TreeNode resultNode,
+ Set<BigDecimal> decimalValues, Integer precision, Integer scale) {
+ return InNode.makeDecimalInExpr(resultNode, decimalValues, precision, scale);
+ }
+
+ public static TreeNode makeInExpressionFloat(TreeNode resultNode,
+ Set<Float> floatValues) {
+ return InNode.makeFloatInExpr(resultNode, floatValues);
+ }
+
+ public static TreeNode makeInExpressionDouble(TreeNode resultNode,
+ Set<Double> doubleValues) {
+ return InNode.makeDoubleInExpr(resultNode, doubleValues);
+ }
+
+ public static TreeNode makeInExpressionString(TreeNode resultNode,
+ Set<String> stringValues) {
+ return InNode.makeStringInExpr(resultNode, stringValues);
+ }
+
+ public static TreeNode makeInExpressionBinary(TreeNode resultNode,
+ Set<byte[]> binaryValues) {
+ return InNode.makeBinaryInExpr(resultNode, binaryValues);
+ }
+}
diff --git a/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeNode.java b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeNode.java
new file mode 100644
index 000000000..b8d70d6e7
--- /dev/null
+++ b/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeNode.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.expression;
+
+import org.apache.arrow.gandiva.exceptions.GandivaException;
+import org.apache.arrow.gandiva.ipc.GandivaTypes;
+
+/**
+ * Defines an internal node in the expression tree.
+ */
+public interface TreeNode {
+ /**
+ * Converts a TreeNode into a protobuf.
+ *
+ * @return A treenode protobuf
+ * @throws GandivaException in case the TreeNode cannot be processed
+ */
+ GandivaTypes.TreeNode toProtobuf() throws GandivaException;
+}
diff --git a/src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/BaseEvaluatorTest.java b/src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/BaseEvaluatorTest.java
new file mode 100644
index 000000000..4a36c0405
--- /dev/null
+++ b/src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/BaseEvaluatorTest.java
@@ -0,0 +1,404 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.evaluator;
+
+import java.math.BigDecimal;
+import java.time.Instant;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Random;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.arrow.gandiva.exceptions.GandivaException;
+import org.apache.arrow.gandiva.expression.Condition;
+import org.apache.arrow.gandiva.expression.ExpressionTree;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.DecimalVector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.types.FloatingPointPrecision;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.After;
+import org.junit.Before;
+
+class BaseEvaluatorTest {
+
+ interface BaseEvaluator {
+
+ void evaluate(ArrowRecordBatch recordBatch, BufferAllocator allocator) throws GandivaException;
+
+ long getElapsedMillis();
+ }
+
+ class ProjectEvaluator implements BaseEvaluator {
+
+ private Projector projector;
+ private DataAndVectorGenerator generator;
+ private int numExprs;
+ private int maxRowsInBatch;
+ private long elapsedTime = 0;
+ private List<ValueVector> outputVectors = new ArrayList<>();
+
+ public ProjectEvaluator(Projector projector,
+ DataAndVectorGenerator generator,
+ int numExprs,
+ int maxRowsInBatch) {
+ this.projector = projector;
+ this.generator = generator;
+ this.numExprs = numExprs;
+ this.maxRowsInBatch = maxRowsInBatch;
+ }
+
+ @Override
+ public void evaluate(ArrowRecordBatch recordBatch,
+ BufferAllocator allocator) throws GandivaException {
+ // set up output vectors
+ // for each expression, generate the output vector
+ for (int i = 0; i < numExprs; i++) {
+ ValueVector valueVector = generator.generateOutputVector(maxRowsInBatch);
+ outputVectors.add(valueVector);
+ }
+
+ try {
+ long start = System.nanoTime();
+ projector.evaluate(recordBatch, outputVectors);
+ long finish = System.nanoTime();
+ elapsedTime += (finish - start);
+ } finally {
+ for (ValueVector valueVector : outputVectors) {
+ valueVector.close();
+ }
+ }
+ outputVectors.clear();
+ }
+
+ @Override
+ public long getElapsedMillis() {
+ return TimeUnit.NANOSECONDS.toMillis(elapsedTime);
+ }
+ }
+
+ class FilterEvaluator implements BaseEvaluator {
+
+ private Filter filter;
+ private long elapsedTime = 0;
+
+ public FilterEvaluator(Filter filter) {
+ this.filter = filter;
+ }
+
+ @Override
+ public void evaluate(ArrowRecordBatch recordBatch,
+ BufferAllocator allocator) throws GandivaException {
+ ArrowBuf selectionBuffer = allocator.buffer(recordBatch.getLength() * 2);
+ SelectionVectorInt16 selectionVector = new SelectionVectorInt16(selectionBuffer);
+
+ try {
+ long start = System.nanoTime();
+ filter.evaluate(recordBatch, selectionVector);
+ long finish = System.nanoTime();
+ elapsedTime += (finish - start);
+ } finally {
+ selectionBuffer.close();
+ }
+ }
+
+ @Override
+ public long getElapsedMillis() {
+ return TimeUnit.NANOSECONDS.toMillis(elapsedTime);
+ }
+ }
+
+ interface DataAndVectorGenerator {
+
+ void writeData(ArrowBuf buffer);
+
+ ValueVector generateOutputVector(int numRowsInBatch);
+ }
+
+ class Int32DataAndVectorGenerator implements DataAndVectorGenerator {
+
+ protected final BufferAllocator allocator;
+ protected final Random rand;
+
+ Int32DataAndVectorGenerator(BufferAllocator allocator) {
+ this.allocator = allocator;
+ this.rand = new Random();
+ }
+
+ @Override
+ public void writeData(ArrowBuf buffer) {
+ buffer.writeInt(rand.nextInt());
+ }
+
+ @Override
+ public ValueVector generateOutputVector(int numRowsInBatch) {
+ IntVector intVector = new IntVector(BaseEvaluatorTest.EMPTY_SCHEMA_PATH, allocator);
+ intVector.allocateNew(numRowsInBatch);
+ return intVector;
+ }
+ }
+
+ class BoundedInt32DataAndVectorGenerator extends Int32DataAndVectorGenerator {
+
+ private final int upperBound;
+
+ BoundedInt32DataAndVectorGenerator(BufferAllocator allocator, int upperBound) {
+ super(allocator);
+ this.upperBound = upperBound;
+ }
+
+ @Override
+ public void writeData(ArrowBuf buffer) {
+ buffer.writeInt(rand.nextInt(upperBound));
+ }
+ }
+
+ protected static final int THOUSAND = 1000;
+ protected static final int MILLION = THOUSAND * THOUSAND;
+
+ protected static final String EMPTY_SCHEMA_PATH = "";
+
+ protected BufferAllocator allocator;
+ protected ArrowType boolType;
+ protected ArrowType int8;
+ protected ArrowType int32;
+ protected ArrowType int64;
+ protected ArrowType float64;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ boolType = new ArrowType.Bool();
+ int8 = new ArrowType.Int(8, true);
+ int32 = new ArrowType.Int(32, true);
+ int64 = new ArrowType.Int(64, true);
+ float64 = new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE);
+ }
+
+ @After
+ public void tearDown() {
+ allocator.close();
+ }
+
+ ArrowBuf buf(int length) {
+ ArrowBuf buffer = allocator.buffer(length);
+ return buffer;
+ }
+
+ ArrowBuf buf(byte[] bytes) {
+ ArrowBuf buffer = allocator.buffer(bytes.length);
+ buffer.writeBytes(bytes);
+ return buffer;
+ }
+
+ ArrowBuf arrowBufWithAllValid(int size) {
+ int bufLen = (size + 7) / 8;
+ ArrowBuf buffer = allocator.buffer(bufLen);
+ for (int i = 0; i < bufLen; i++) {
+ buffer.writeByte(255);
+ }
+
+ return buffer;
+ }
+
+ ArrowBuf intBuf(int[] ints) {
+ ArrowBuf buffer = allocator.buffer(ints.length * 4);
+ for (int i = 0; i < ints.length; i++) {
+ buffer.writeInt(ints[i]);
+ }
+ return buffer;
+ }
+
+ DecimalVector decimalVector(String[] values, int precision, int scale) {
+ DecimalVector vector = new DecimalVector("decimal" + Math.random(), allocator, precision, scale);
+ vector.allocateNew();
+ for (int i = 0; i < values.length; i++) {
+ BigDecimal decimal = new BigDecimal(values[i]).setScale(scale);
+ vector.setSafe(i, decimal);
+ }
+
+ vector.setValueCount(values.length);
+ return vector;
+ }
+
+ Set decimalSet(String[] values, Integer scale) {
+ Set<BigDecimal> decimalSet = new HashSet<>();
+ for (int i = 0; i < values.length; i++) {
+ decimalSet.add(new BigDecimal(values[i]).setScale(scale));
+ }
+
+ return decimalSet;
+ }
+
+ VarCharVector varcharVector(String[] values) {
+ VarCharVector vector = new VarCharVector("VarCharVector" + Math.random(), allocator);
+ vector.allocateNew();
+ for (int i = 0; i < values.length; i++) {
+ vector.setSafe(i, values[i].getBytes(), 0, values[i].length());
+ }
+
+ vector.setValueCount(values.length);
+ return vector;
+ }
+
+ ArrowBuf longBuf(long[] longs) {
+ ArrowBuf buffer = allocator.buffer(longs.length * 8);
+ for (int i = 0; i < longs.length; i++) {
+ buffer.writeLong(longs[i]);
+ }
+ return buffer;
+ }
+
+ ArrowBuf doubleBuf(double[] data) {
+ ArrowBuf buffer = allocator.buffer(data.length * 8);
+ for (int i = 0; i < data.length; i++) {
+ buffer.writeDouble(data[i]);
+ }
+
+ return buffer;
+ }
+
+ ArrowBuf stringToMillis(String[] dates) {
+ ArrowBuf buffer = allocator.buffer(dates.length * 8);
+ for (int i = 0; i < dates.length; i++) {
+ Instant instant = Instant.parse(dates[i]);
+ buffer.writeLong(instant.toEpochMilli());
+ }
+
+ return buffer;
+ }
+
+ ArrowBuf stringToDayInterval(String[] values) {
+ ArrowBuf buffer = allocator.buffer(values.length * 8);
+ for (int i = 0; i < values.length; i++) {
+ buffer.writeInt(Integer.parseInt(values[i].split(" ")[0])); // days
+ buffer.writeInt(Integer.parseInt(values[i].split(" ")[1])); // millis
+ }
+ return buffer;
+ }
+
+ void releaseRecordBatch(ArrowRecordBatch recordBatch) {
+ // There are 2 references to the buffers
+ // One in the recordBatch - release that by calling close()
+ // One in the allocator - release that explicitly
+ List<ArrowBuf> buffers = recordBatch.getBuffers();
+ recordBatch.close();
+ for (ArrowBuf buf : buffers) {
+ buf.getReferenceManager().release();
+ }
+ }
+
+ void releaseValueVectors(List<ValueVector> valueVectors) {
+ for (ValueVector valueVector : valueVectors) {
+ valueVector.close();
+ }
+ }
+
+ void generateData(DataAndVectorGenerator generator, int numRecords, ArrowBuf buffer) {
+ for (int i = 0; i < numRecords; i++) {
+ generator.writeData(buffer);
+ }
+ }
+
+ private void generateDataAndEvaluate(DataAndVectorGenerator generator,
+ BaseEvaluator evaluator,
+ int numFields,
+ int numRows, int maxRowsInBatch,
+ int inputFieldSize)
+ throws GandivaException, Exception {
+ int numRemaining = numRows;
+ List<ArrowBuf> inputData = new ArrayList<ArrowBuf>();
+ List<ArrowFieldNode> fieldNodes = new ArrayList<ArrowFieldNode>();
+
+ // set the bitmap
+ while (numRemaining > 0) {
+ int numRowsInBatch = maxRowsInBatch;
+ if (numRowsInBatch > numRemaining) {
+ numRowsInBatch = numRemaining;
+ }
+
+ // generate data
+ for (int i = 0; i < numFields; i++) {
+ ArrowBuf buf = allocator.buffer(numRowsInBatch * inputFieldSize);
+ ArrowBuf validity = arrowBufWithAllValid(maxRowsInBatch);
+ generateData(generator, numRowsInBatch, buf);
+
+ fieldNodes.add(new ArrowFieldNode(numRowsInBatch, 0));
+ inputData.add(validity);
+ inputData.add(buf);
+ }
+
+ // create record batch
+ ArrowRecordBatch recordBatch = new ArrowRecordBatch(numRowsInBatch, fieldNodes, inputData);
+
+ evaluator.evaluate(recordBatch, allocator);
+
+ // fix numRemaining
+ numRemaining -= numRowsInBatch;
+
+ // release refs
+ releaseRecordBatch(recordBatch);
+
+ inputData.clear();
+ fieldNodes.clear();
+ }
+ }
+
+ long timedProject(DataAndVectorGenerator generator,
+ Schema schema, List<ExpressionTree> exprs,
+ int numRows, int maxRowsInBatch,
+ int inputFieldSize)
+ throws GandivaException, Exception {
+ Projector projector = Projector.make(schema, exprs);
+ try {
+ ProjectEvaluator evaluator =
+ new ProjectEvaluator(projector, generator, exprs.size(), maxRowsInBatch);
+ generateDataAndEvaluate(generator, evaluator,
+ schema.getFields().size(), numRows, maxRowsInBatch, inputFieldSize);
+ return evaluator.getElapsedMillis();
+ } finally {
+ projector.close();
+ }
+ }
+
+ long timedFilter(DataAndVectorGenerator generator,
+ Schema schema, Condition condition,
+ int numRows, int maxRowsInBatch,
+ int inputFieldSize)
+ throws GandivaException, Exception {
+
+ Filter filter = Filter.make(schema, condition);
+ try {
+ FilterEvaluator evaluator = new FilterEvaluator(filter);
+ generateDataAndEvaluate(generator, evaluator,
+ schema.getFields().size(), numRows, maxRowsInBatch, inputFieldSize);
+ return evaluator.getElapsedMillis();
+ } finally {
+ filter.close();
+ }
+ }
+}
diff --git a/src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/DecimalTypeUtilTest.java b/src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/DecimalTypeUtilTest.java
new file mode 100644
index 000000000..fe51c09e3
--- /dev/null
+++ b/src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/DecimalTypeUtilTest.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.evaluator;
+
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class DecimalTypeUtilTest {
+
+ @Test
+ public void testOutputTypesForAdd() {
+ ArrowType.Decimal operand1 = getDecimal(30, 10);
+ ArrowType.Decimal operand2 = getDecimal(30, 10);
+ ArrowType.Decimal resultType = DecimalTypeUtil.getResultTypeForOperation(DecimalTypeUtil
+ .OperationType.ADD, operand1, operand2);
+ Assert.assertTrue(getDecimal(31, 10).equals(resultType));
+
+ operand1 = getDecimal(30, 6);
+ operand2 = getDecimal(30, 5);
+ resultType = DecimalTypeUtil.getResultTypeForOperation(DecimalTypeUtil
+ .OperationType.ADD, operand1, operand2);
+ Assert.assertTrue(getDecimal(32, 6).equals(resultType));
+
+ operand1 = getDecimal(30, 10);
+ operand2 = getDecimal(38, 10);
+ resultType = DecimalTypeUtil.getResultTypeForOperation(DecimalTypeUtil
+ .OperationType.ADD, operand1, operand2);
+ Assert.assertTrue(getDecimal(38, 9).equals(resultType));
+
+ operand1 = getDecimal(38, 10);
+ operand2 = getDecimal(38, 38);
+ resultType = DecimalTypeUtil.getResultTypeForOperation(DecimalTypeUtil
+ .OperationType.ADD, operand1, operand2);
+ Assert.assertTrue(getDecimal(38, 9).equals(resultType));
+
+ operand1 = getDecimal(38, 10);
+ operand2 = getDecimal(38, 2);
+ resultType = DecimalTypeUtil.getResultTypeForOperation(DecimalTypeUtil
+ .OperationType.ADD, operand1, operand2);
+ Assert.assertTrue(getDecimal(38, 6).equals(resultType));
+
+ }
+
+ @Test
+ public void testOutputTypesForMultiply() {
+ ArrowType.Decimal operand1 = getDecimal(30, 10);
+ ArrowType.Decimal operand2 = getDecimal(30, 10);
+ ArrowType.Decimal resultType = DecimalTypeUtil.getResultTypeForOperation(DecimalTypeUtil
+ .OperationType.MULTIPLY, operand1, operand2);
+ Assert.assertTrue(getDecimal(38, 6).equals(resultType));
+
+ operand1 = getDecimal(38, 10);
+ operand2 = getDecimal(9, 2);
+ resultType = DecimalTypeUtil.getResultTypeForOperation(DecimalTypeUtil
+ .OperationType.MULTIPLY, operand1, operand2);
+ Assert.assertTrue(getDecimal(38, 6).equals(resultType));
+
+ }
+
+ @Test
+ public void testOutputTypesForMod() {
+ ArrowType.Decimal operand1 = getDecimal(30, 10);
+ ArrowType.Decimal operand2 = getDecimal(28, 7);
+ ArrowType.Decimal resultType = DecimalTypeUtil.getResultTypeForOperation(DecimalTypeUtil
+ .OperationType.MOD, operand1, operand2);
+ Assert.assertTrue(getDecimal(30, 10).equals(resultType));
+ }
+
+ private ArrowType.Decimal getDecimal(int precision, int scale) {
+ return new ArrowType.Decimal(precision, scale, 128);
+ }
+
+}
diff --git a/src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistryTest.java b/src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistryTest.java
new file mode 100644
index 000000000..a51ac09ba
--- /dev/null
+++ b/src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistryTest.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.evaluator;
+
+import java.util.Set;
+
+import org.apache.arrow.gandiva.exceptions.GandivaException;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.junit.Assert;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+public class ExpressionRegistryTest {
+
+ @Test
+ public void testTypes() throws GandivaException {
+ Set<ArrowType> types = ExpressionRegistry.getInstance().getSupportedTypes();
+ ArrowType.Int uint8 = new ArrowType.Int(8, false);
+ Assert.assertTrue(types.contains(uint8));
+ }
+
+ @Test
+ public void testFunctions() throws GandivaException {
+ ArrowType.Int uint8 = new ArrowType.Int(8, false);
+ FunctionSignature signature =
+ new FunctionSignature("add", uint8, Lists.newArrayList(uint8, uint8));
+ Set<FunctionSignature> functions = ExpressionRegistry.getInstance().getSupportedFunctions();
+ Assert.assertTrue(functions.contains(signature));
+ }
+
+ @Test
+ public void testFunctionAliases() throws GandivaException {
+ ArrowType.Int int64 = new ArrowType.Int(64, true);
+ FunctionSignature signature =
+ new FunctionSignature("modulo", int64, Lists.newArrayList(int64, int64));
+ Set<FunctionSignature> functions = ExpressionRegistry.getInstance().getSupportedFunctions();
+ Assert.assertTrue(functions.contains(signature));
+ }
+
+ @Test
+ public void testCaseInsensitiveFunctionName() throws GandivaException {
+ ArrowType.Utf8 utf8 = new ArrowType.Utf8();
+ ArrowType.Int int64 = new ArrowType.Int(64, true);
+ FunctionSignature signature =
+ new FunctionSignature("castvarchar", utf8, Lists.newArrayList(utf8, int64));
+ Set<FunctionSignature> functions = ExpressionRegistry.getInstance().getSupportedFunctions();
+ Assert.assertTrue(functions.contains(signature));
+ }
+}
diff --git a/src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/FilterProjectTest.java b/src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/FilterProjectTest.java
new file mode 100644
index 000000000..51fc1c291
--- /dev/null
+++ b/src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/FilterProjectTest.java
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.evaluator;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.arrow.gandiva.exceptions.GandivaException;
+import org.apache.arrow.gandiva.expression.Condition;
+import org.apache.arrow.gandiva.expression.ExpressionTree;
+import org.apache.arrow.gandiva.expression.TreeBuilder;
+import org.apache.arrow.gandiva.ipc.GandivaTypes.SelectionVectorType;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+public class FilterProjectTest extends BaseEvaluatorTest {
+
+ @Test
+ public void testSimpleSV16() throws GandivaException, Exception {
+ Field a = Field.nullable("a", int32);
+ Field b = Field.nullable("b", int32);
+ Field c = Field.nullable("c", int32);
+ List<Field> args = Lists.newArrayList(a, b);
+
+ Condition condition = TreeBuilder.makeCondition("less_than", args);
+
+ Schema schema = new Schema(args);
+ Filter filter = Filter.make(schema, condition);
+
+ ExpressionTree expression = TreeBuilder.makeExpression("add", Lists.newArrayList(a, b), c);
+ Projector projector = Projector.make(schema, Lists.newArrayList(expression), SelectionVectorType.SV_INT16);
+
+ int numRows = 16;
+ byte[] validity = new byte[]{(byte) 255, 0};
+ // second half is "undefined"
+ int[] aValues = new int[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+ int[] bValues = new int[]{2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 14, 15};
+ int[] expected = {3, 7, 11, 15};
+
+ verifyTestCaseFor16(filter, projector, numRows, validity, aValues, bValues, expected);
+ }
+
+ private void verifyTestCaseFor16(Filter filter, Projector projector, int numRows, byte[] validity,
+ int[] aValues, int[] bValues, int[] expected) throws GandivaException {
+ ArrowBuf validitya = buf(validity);
+ ArrowBuf valuesa = intBuf(aValues);
+ ArrowBuf validityb = buf(validity);
+ ArrowBuf valuesb = intBuf(bValues);
+ ArrowRecordBatch batch = new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(new ArrowFieldNode(numRows, 0), new ArrowFieldNode(numRows, 0)),
+ Lists.newArrayList(validitya, valuesa, validityb, valuesb));
+
+ ArrowBuf selectionBuffer = buf(numRows * 2);
+ SelectionVectorInt16 selectionVector = new SelectionVectorInt16(selectionBuffer);
+
+ filter.evaluate(batch, selectionVector);
+
+ IntVector intVector = new IntVector(EMPTY_SCHEMA_PATH, allocator);
+ intVector.allocateNew(selectionVector.getRecordCount());
+
+ List<ValueVector> output = new ArrayList<ValueVector>();
+ output.add(intVector);
+ projector.evaluate(batch, selectionVector, output);
+ for (int i = 0; i < selectionVector.getRecordCount(); i++) {
+ assertFalse(intVector.isNull(i));
+ assertEquals(expected[i], intVector.get(i));
+ }
+ // free buffers
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ selectionBuffer.close();
+ filter.close();
+ projector.close();
+ }
+}
diff --git a/src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/FilterTest.java b/src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/FilterTest.java
new file mode 100644
index 000000000..ed6e43cd6
--- /dev/null
+++ b/src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/FilterTest.java
@@ -0,0 +1,315 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.evaluator;
+
+import java.nio.charset.Charset;
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.IntStream;
+
+import org.apache.arrow.gandiva.exceptions.GandivaException;
+import org.apache.arrow.gandiva.expression.Condition;
+import org.apache.arrow.gandiva.expression.TreeBuilder;
+import org.apache.arrow.gandiva.expression.TreeNode;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.Assert;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+
+public class FilterTest extends BaseEvaluatorTest {
+
+ private int[] selectionVectorToArray(SelectionVector vector) {
+ int[] actual = new int[vector.getRecordCount()];
+ for (int i = 0; i < vector.getRecordCount(); ++i) {
+ actual[i] = vector.getIndex(i);
+ }
+ return actual;
+ }
+
+ private Charset utf8Charset = Charset.forName("UTF-8");
+ private Charset utf16Charset = Charset.forName("UTF-16");
+
+ List<ArrowBuf> varBufs(String[] strings, Charset charset) {
+ ArrowBuf offsetsBuffer = allocator.buffer((strings.length + 1) * 4);
+ ArrowBuf dataBuffer = allocator.buffer(strings.length * 8);
+
+ int startOffset = 0;
+ for (int i = 0; i < strings.length; i++) {
+ offsetsBuffer.writeInt(startOffset);
+
+ final byte[] bytes = strings[i].getBytes(charset);
+ dataBuffer = dataBuffer.reallocIfNeeded(dataBuffer.writerIndex() + bytes.length);
+ dataBuffer.setBytes(startOffset, bytes, 0, bytes.length);
+ startOffset += bytes.length;
+ }
+ offsetsBuffer.writeInt(startOffset); // offset for the last element
+
+ return Arrays.asList(offsetsBuffer, dataBuffer);
+ }
+
+ List<ArrowBuf> stringBufs(String[] strings) {
+ return varBufs(strings, utf8Charset);
+ }
+
+ @Test
+ public void testSimpleInString() throws GandivaException, Exception {
+ Field c1 = Field.nullable("c1", new ArrowType.Utf8());
+ TreeNode l1 = TreeBuilder.makeLiteral(1L);
+ TreeNode l2 = TreeBuilder.makeLiteral(3L);
+
+ List<Field> argsSchema = Lists.newArrayList(c1);
+ List<TreeNode> args = Lists.newArrayList(TreeBuilder.makeField(c1), l1, l2);
+ TreeNode substr = TreeBuilder.makeFunction("substr", args, new ArrowType.Utf8());
+ TreeNode inExpr =
+ TreeBuilder.makeInExpressionString(substr, Sets.newHashSet("one", "two", "thr", "fou"));
+
+ Condition condition = TreeBuilder.makeCondition(inExpr);
+
+ Schema schema = new Schema(argsSchema);
+ Filter filter = Filter.make(schema, condition);
+
+ int numRows = 16;
+ byte[] validity = new byte[] {(byte) 255, 0};
+ // second half is "undefined"
+ String[] c1Values = new String[]{"one", "two", "three", "four", "five", "six", "seven",
+ "eight", "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
+ "sixteen"};
+ int[] expected = {0, 1, 2, 3};
+ ArrowBuf c1Validity = buf(validity);
+ ArrowBuf c2Validity = buf(validity);
+ List<ArrowBuf> dataBufsX = stringBufs(c1Values);
+
+ ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
+ ArrowRecordBatch batch =
+ new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(fieldNode),
+ Lists.newArrayList(c1Validity, dataBufsX.get(0), dataBufsX.get(1), c2Validity));
+
+ ArrowBuf selectionBuffer = buf(numRows * 2);
+ SelectionVectorInt16 selectionVector = new SelectionVectorInt16(selectionBuffer);
+
+ filter.evaluate(batch, selectionVector);
+
+ int[] actual = selectionVectorToArray(selectionVector);
+ releaseRecordBatch(batch);
+ selectionBuffer.close();
+ filter.close();
+ Assert.assertArrayEquals(expected, actual);
+ }
+
+ @Test
+ public void testSimpleInInt() throws GandivaException, Exception {
+ Field c1 = Field.nullable("c1", int32);
+
+ List<Field> argsSchema = Lists.newArrayList(c1);
+ TreeNode inExpr =
+ TreeBuilder.makeInExpressionInt32(TreeBuilder.makeField(c1), Sets.newHashSet(1, 2, 3, 4));
+
+ Condition condition = TreeBuilder.makeCondition(inExpr);
+
+ Schema schema = new Schema(argsSchema);
+ Filter filter = Filter.make(schema, condition);
+
+ int numRows = 16;
+ byte[] validity = new byte[] {(byte) 255, 0};
+ // second half is "undefined"
+ int[] aValues = new int[] {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+ int[] expected = {0, 1, 2, 3};
+
+ ArrowBuf validitya = buf(validity);
+ ArrowBuf validityb = buf(validity);
+ ArrowBuf valuesa = intBuf(aValues);
+
+ ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
+ ArrowRecordBatch batch =
+ new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(fieldNode),
+ Lists.newArrayList(validitya, valuesa, validityb));
+
+ ArrowBuf selectionBuffer = buf(numRows * 2);
+ SelectionVectorInt16 selectionVector = new SelectionVectorInt16(selectionBuffer);
+
+ filter.evaluate(batch, selectionVector);
+
+ // free buffers
+ int[] actual = selectionVectorToArray(selectionVector);
+ releaseRecordBatch(batch);
+ selectionBuffer.close();
+ filter.close();
+ Assert.assertArrayEquals(expected, actual);
+ }
+
+ @Test
+ public void testSimpleSV16() throws GandivaException, Exception {
+ Field a = Field.nullable("a", int32);
+ Field b = Field.nullable("b", int32);
+ List<Field> args = Lists.newArrayList(a, b);
+
+ Condition condition = TreeBuilder.makeCondition("less_than", args);
+
+ Schema schema = new Schema(args);
+ Filter filter = Filter.make(schema, condition);
+
+ int numRows = 16;
+ byte[] validity = new byte[] {(byte) 255, 0};
+ // second half is "undefined"
+ int[] aValues = new int[] {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+ int[] bValues = new int[] {2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 14, 15};
+ int[] expected = {0, 2, 4, 6};
+
+ verifyTestCase(filter, numRows, validity, aValues, bValues, expected);
+ }
+
+ @Test
+ public void testSimpleSV16_AllMatched() throws GandivaException, Exception {
+ Field a = Field.nullable("a", int32);
+ Field b = Field.nullable("b", int32);
+ List<Field> args = Lists.newArrayList(a, b);
+
+ Condition condition = TreeBuilder.makeCondition("less_than", args);
+
+ Schema schema = new Schema(args);
+ Filter filter = Filter.make(schema, condition);
+
+ int numRows = 32;
+
+ byte[] validity = new byte[numRows / 8];
+
+ IntStream.range(0, numRows / 8).forEach(i -> validity[i] = (byte) 255);
+
+ int[] aValues = new int[numRows];
+ IntStream.range(0, numRows).forEach(i -> aValues[i] = i);
+
+ int[] bValues = new int[numRows];
+ IntStream.range(0, numRows).forEach(i -> bValues[i] = i + 1);
+
+ int[] expected = new int[numRows];
+ IntStream.range(0, numRows).forEach(i -> expected[i] = i);
+
+ verifyTestCase(filter, numRows, validity, aValues, bValues, expected);
+ }
+
+ @Test
+ public void testSimpleSV16_GreaterThan64Recs() throws GandivaException, Exception {
+ Field a = Field.nullable("a", int32);
+ Field b = Field.nullable("b", int32);
+ List<Field> args = Lists.newArrayList(a, b);
+
+ Condition condition = TreeBuilder.makeCondition("greater_than", args);
+
+ Schema schema = new Schema(args);
+ Filter filter = Filter.make(schema, condition);
+
+ int numRows = 1000;
+
+ byte[] validity = new byte[numRows / 8];
+
+ IntStream.range(0, numRows / 8).forEach(i -> validity[i] = (byte) 255);
+
+ int[] aValues = new int[numRows];
+ IntStream.range(0, numRows).forEach(i -> aValues[i] = i);
+
+ int[] bValues = new int[numRows];
+ IntStream.range(0, numRows).forEach(i -> bValues[i] = i + 1);
+
+ aValues[0] = 5;
+ bValues[0] = 0;
+
+ int[] expected = {0};
+
+ verifyTestCase(filter, numRows, validity, aValues, bValues, expected);
+ }
+
+ @Test
+ public void testSimpleSV32() throws GandivaException, Exception {
+ Field a = Field.nullable("a", int32);
+ Field b = Field.nullable("b", int32);
+ List<Field> args = Lists.newArrayList(a, b);
+
+ Condition condition = TreeBuilder.makeCondition("less_than", args);
+
+ Schema schema = new Schema(args);
+ Filter filter = Filter.make(schema, condition);
+
+ int numRows = 16;
+ byte[] validity = new byte[] {(byte) 255, 0};
+ // second half is "undefined"
+ int[] aValues = new int[] {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+ int[] bValues = new int[] {2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 14, 15};
+ int[] expected = {0, 2, 4, 6};
+
+ verifyTestCase(filter, numRows, validity, aValues, bValues, expected);
+ }
+
+ @Test
+ public void testSimpleFilterWithNoOptimisation() throws GandivaException, Exception {
+ Field a = Field.nullable("a", int32);
+ Field b = Field.nullable("b", int32);
+ List<Field> args = Lists.newArrayList(a, b);
+
+ Condition condition = TreeBuilder.makeCondition("less_than", args);
+
+ Schema schema = new Schema(args);
+ Filter filter = Filter.make(schema, condition, false);
+
+ int numRows = 16;
+ byte[] validity = new byte[] {(byte) 255, 0};
+ // second half is "undefined"
+ int[] aValues = new int[] {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+ int[] bValues = new int[] {2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 14, 15};
+ int[] expected = {0, 2, 4, 6};
+
+ verifyTestCase(filter, numRows, validity, aValues, bValues, expected);
+ }
+
+ private void verifyTestCase(
+ Filter filter, int numRows, byte[] validity, int[] aValues, int[] bValues, int[] expected)
+ throws GandivaException {
+ ArrowBuf validitya = buf(validity);
+ ArrowBuf valuesa = intBuf(aValues);
+ ArrowBuf validityb = buf(validity);
+ ArrowBuf valuesb = intBuf(bValues);
+ ArrowRecordBatch batch =
+ new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(new ArrowFieldNode(numRows, 0), new ArrowFieldNode(numRows, 0)),
+ Lists.newArrayList(validitya, valuesa, validityb, valuesb));
+
+ ArrowBuf selectionBuffer = buf(numRows * 2);
+ SelectionVectorInt16 selectionVector = new SelectionVectorInt16(selectionBuffer);
+
+ filter.evaluate(batch, selectionVector);
+
+ // free buffers
+ int[] actual = selectionVectorToArray(selectionVector);
+ releaseRecordBatch(batch);
+ selectionBuffer.close();
+ filter.close();
+
+ Assert.assertArrayEquals(expected, actual);
+ }
+}
diff --git a/src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/MicroBenchmarkTest.java b/src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/MicroBenchmarkTest.java
new file mode 100644
index 000000000..6934c3f9e
--- /dev/null
+++ b/src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/MicroBenchmarkTest.java
@@ -0,0 +1,151 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.evaluator;
+
+import java.util.List;
+
+import org.apache.arrow.gandiva.expression.Condition;
+import org.apache.arrow.gandiva.expression.ExpressionTree;
+import org.apache.arrow.gandiva.expression.TreeBuilder;
+import org.apache.arrow.gandiva.expression.TreeNode;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.Assert;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+@Ignore
+public class MicroBenchmarkTest extends BaseEvaluatorTest {
+
+ private double toleranceRatio = 4.0;
+
+ @Test
+ public void testAdd3() throws Exception {
+ Field x = Field.nullable("x", int32);
+ Field n2x = Field.nullable("n2x", int32);
+ Field n3x = Field.nullable("n3x", int32);
+
+ // x + n2x + n3x
+ TreeNode add1 =
+ TreeBuilder.makeFunction(
+ "add", Lists.newArrayList(TreeBuilder.makeField(x), TreeBuilder.makeField(n2x)), int32);
+ TreeNode add =
+ TreeBuilder.makeFunction(
+ "add", Lists.newArrayList(add1, TreeBuilder.makeField(n3x)), int32);
+ ExpressionTree expr = TreeBuilder.makeExpression(add, x);
+
+ List<Field> cols = Lists.newArrayList(x, n2x, n3x);
+ Schema schema = new Schema(cols);
+
+ long timeTaken = timedProject(new Int32DataAndVectorGenerator(allocator),
+ schema,
+ Lists.newArrayList(expr),
+ 1 * MILLION, 16 * THOUSAND,
+ 4);
+ System.out.println("Time taken for projecting 1m records of add3 is " + timeTaken + "ms");
+ Assert.assertTrue(timeTaken <= 13 * toleranceRatio);
+ }
+
+ @Test
+ public void testIf() throws Exception {
+ /*
+ * when x < 10 then 0
+ * when x < 20 then 1
+ * when x < 30 then 2
+ * when x < 40 then 3
+ * when x < 50 then 4
+ * when x < 60 then 5
+ * when x < 70 then 6
+ * when x < 80 then 7
+ * when x < 90 then 8
+ * when x < 100 then 9
+ * when x < 110 then 10
+ * when x < 120 then 11
+ * when x < 130 then 12
+ * when x < 140 then 13
+ * when x < 150 then 14
+ * when x < 160 then 15
+ * when x < 170 then 16
+ * when x < 180 then 17
+ * when x < 190 then 18
+ * when x < 200 then 19
+ * else 20
+ */
+ Field x = Field.nullable("x", int32);
+ TreeNode xNode = TreeBuilder.makeField(x);
+
+ // if (x < 100) then 9 else 10
+ int returnValue = 20;
+ TreeNode topNode = TreeBuilder.makeLiteral(returnValue);
+ int compareWith = 200;
+ while (compareWith >= 10) {
+ // cond (x < compareWith)
+ TreeNode condNode =
+ TreeBuilder.makeFunction(
+ "less_than",
+ Lists.newArrayList(xNode, TreeBuilder.makeLiteral(compareWith)),
+ boolType);
+ topNode =
+ TreeBuilder.makeIf(
+ condNode, // cond (x < compareWith)
+ TreeBuilder.makeLiteral(returnValue), // then returnValue
+ topNode, // else topNode
+ int32);
+ compareWith -= 10;
+ returnValue--;
+ }
+
+ ExpressionTree expr = TreeBuilder.makeExpression(topNode, x);
+ Schema schema = new Schema(Lists.newArrayList(x));
+
+ long timeTaken = timedProject(new BoundedInt32DataAndVectorGenerator(allocator, 250),
+ schema,
+ Lists.newArrayList(expr),
+ 1 * MILLION, 16 * THOUSAND,
+ 4);
+ System.out.println("Time taken for projecting 10m records of nestedIf is " + timeTaken + "ms");
+ Assert.assertTrue(timeTaken <= 15 * toleranceRatio);
+ }
+
+ @Test
+ public void testFilterAdd2() throws Exception {
+ Field x = Field.nullable("x", int32);
+ Field n2x = Field.nullable("n2x", int32);
+ Field n3x = Field.nullable("n3x", int32);
+
+ // x + n2x < n3x
+ TreeNode add = TreeBuilder.makeFunction("add",
+ Lists.newArrayList(TreeBuilder.makeField(x), TreeBuilder.makeField(n2x)), int32);
+ TreeNode lessThan = TreeBuilder
+ .makeFunction("less_than", Lists.newArrayList(add, TreeBuilder.makeField(n3x)), boolType);
+ Condition condition = TreeBuilder.makeCondition(lessThan);
+
+ List<Field> cols = Lists.newArrayList(x, n2x, n3x);
+ Schema schema = new Schema(cols);
+
+ long timeTaken = timedFilter(new Int32DataAndVectorGenerator(allocator),
+ schema,
+ condition,
+ 1 * MILLION, 16 * THOUSAND,
+ 4);
+ System.out.println("Time taken for filtering 10m records of a+b<c is " + timeTaken + "ms");
+ Assert.assertTrue(timeTaken <= 12 * toleranceRatio);
+ }
+}
diff --git a/src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorDecimalTest.java b/src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorDecimalTest.java
new file mode 100644
index 000000000..28a57c9f8
--- /dev/null
+++ b/src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorDecimalTest.java
@@ -0,0 +1,797 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.evaluator;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.math.BigDecimal;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.arrow.gandiva.exceptions.GandivaException;
+import org.apache.arrow.gandiva.expression.ExpressionTree;
+import org.apache.arrow.gandiva.expression.TreeBuilder;
+import org.apache.arrow.gandiva.expression.TreeNode;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.BitVector;
+import org.apache.arrow.vector.DecimalVector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.ArrowType.Decimal;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+
+import com.google.common.collect.Lists;
+
+public class ProjectorDecimalTest extends org.apache.arrow.gandiva.evaluator.BaseEvaluatorTest {
+ @Rule
+ public ExpectedException exception = ExpectedException.none();
+
+ @Test
+ public void test_add() throws GandivaException {
+ int precision = 38;
+ int scale = 8;
+ ArrowType.Decimal decimal = new ArrowType.Decimal(precision, scale, 128);
+ Field a = Field.nullable("a", decimal);
+ Field b = Field.nullable("b", decimal);
+ List<Field> args = Lists.newArrayList(a, b);
+
+ ArrowType.Decimal outputType = DecimalTypeUtil.getResultTypeForOperation(DecimalTypeUtil
+ .OperationType.ADD, decimal, decimal);
+ Field retType = Field.nullable("c", outputType);
+ ExpressionTree root = TreeBuilder.makeExpression("add", args, retType);
+
+ List<ExpressionTree> exprs = Lists.newArrayList(root);
+
+ Schema schema = new Schema(args);
+ Projector eval = Projector.make(schema, exprs);
+
+ int numRows = 4;
+ byte[] validity = new byte[]{(byte) 255};
+ String[] aValues = new String[]{"1.12345678", "2.12345678", "3.12345678", "4.12345678"};
+ String[] bValues = new String[]{"2.12345678", "3.12345678", "4.12345678", "5.12345678"};
+
+ DecimalVector valuesa = decimalVector(aValues, precision, scale);
+ DecimalVector valuesb = decimalVector(bValues, precision, scale);
+ ArrowRecordBatch batch =
+ new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(new ArrowFieldNode(numRows, 0), new ArrowFieldNode(numRows, 0)),
+ Lists.newArrayList(valuesa.getValidityBuffer(), valuesa.getDataBuffer(),
+ valuesb.getValidityBuffer(), valuesb.getDataBuffer()));
+
+ DecimalVector outVector = new DecimalVector("decimal_output", allocator, outputType.getPrecision(),
+ outputType.getScale());
+ outVector.allocateNew(numRows);
+
+ List<ValueVector> output = new ArrayList<ValueVector>();
+ output.add(outVector);
+ eval.evaluate(batch, output);
+
+ // should have scaled down.
+ BigDecimal[] expOutput = new BigDecimal[]{BigDecimal.valueOf(3.2469136),
+ BigDecimal.valueOf(5.2469136),
+ BigDecimal.valueOf(7.2469136),
+ BigDecimal.valueOf(9.2469136)};
+
+ for (int i = 0; i < 4; i++) {
+ assertFalse(outVector.isNull(i));
+ assertTrue("index : " + i + " failed compare", expOutput[i].compareTo(outVector.getObject(i)
+ ) == 0);
+ }
+
+ // free buffers
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ eval.close();
+ }
+
+ @Test
+ public void test_add_literal() throws GandivaException {
+ int precision = 2;
+ int scale = 0;
+ ArrowType.Decimal decimal = new ArrowType.Decimal(precision, scale, 128);
+ ArrowType.Decimal literalType = new ArrowType.Decimal(2, 1, 128);
+ Field a = Field.nullable("a", decimal);
+
+ ArrowType.Decimal outputType = DecimalTypeUtil.getResultTypeForOperation(DecimalTypeUtil
+ .OperationType.ADD, decimal, literalType);
+ Field retType = Field.nullable("c", outputType);
+ TreeNode field = TreeBuilder.makeField(a);
+ TreeNode literal = TreeBuilder.makeDecimalLiteral("6", 2, 1);
+ List<TreeNode> args = Lists.newArrayList(field, literal);
+ TreeNode root = TreeBuilder.makeFunction("add", args, outputType);
+ ExpressionTree tree = TreeBuilder.makeExpression(root, retType);
+
+ List<ExpressionTree> exprs = Lists.newArrayList(tree);
+
+ Schema schema = new Schema(Lists.newArrayList(a));
+ Projector eval = Projector.make(schema, exprs);
+
+ int numRows = 4;
+ String[] aValues = new String[]{"1", "2", "3", "4"};
+
+ DecimalVector valuesa = decimalVector(aValues, precision, scale);
+ ArrowRecordBatch batch =
+ new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(new ArrowFieldNode(numRows, 0)),
+ Lists.newArrayList(valuesa.getValidityBuffer(), valuesa.getDataBuffer()));
+
+ DecimalVector outVector = new DecimalVector("decimal_output", allocator, outputType.getPrecision(),
+ outputType.getScale());
+ outVector.allocateNew(numRows);
+
+ List<ValueVector> output = new ArrayList<ValueVector>();
+ output.add(outVector);
+ eval.evaluate(batch, output);
+
+ BigDecimal[] expOutput = new BigDecimal[]{BigDecimal.valueOf(1.6), BigDecimal.valueOf(2.6),
+ BigDecimal.valueOf(3.6), BigDecimal.valueOf(4.6)};
+
+ for (int i = 0; i < 4; i++) {
+ assertFalse(outVector.isNull(i));
+ assertTrue(expOutput[i].compareTo(outVector.getObject(i)) == 0);
+ }
+
+ // free buffers
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ eval.close();
+ }
+
+ @Test
+ public void test_multiply() throws GandivaException {
+ int precision = 38;
+ int scale = 8;
+ ArrowType.Decimal decimal = new ArrowType.Decimal(precision, scale, 128);
+ Field a = Field.nullable("a", decimal);
+ Field b = Field.nullable("b", decimal);
+ List<Field> args = Lists.newArrayList(a, b);
+
+ ArrowType.Decimal outputType = DecimalTypeUtil.getResultTypeForOperation(DecimalTypeUtil
+ .OperationType.MULTIPLY, decimal, decimal);
+ Field retType = Field.nullable("c", outputType);
+ ExpressionTree root = TreeBuilder.makeExpression("multiply", args, retType);
+
+ List<ExpressionTree> exprs = Lists.newArrayList(root);
+
+ Schema schema = new Schema(args);
+ Projector eval = Projector.make(schema, exprs);
+
+ int numRows = 4;
+ byte[] validity = new byte[]{(byte) 255};
+ String[] aValues = new String[]{"1.12345678", "2.12345678", "3.12345678", "999999999999.99999999"};
+ String[] bValues = new String[]{"2.12345678", "3.12345678", "4.12345678", "999999999999.99999999"};
+
+ DecimalVector valuesa = decimalVector(aValues, precision, scale);
+ DecimalVector valuesb = decimalVector(bValues, precision, scale);
+ ArrowRecordBatch batch =
+ new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(new ArrowFieldNode(numRows, 0), new ArrowFieldNode(numRows, 0)),
+ Lists.newArrayList(valuesa.getValidityBuffer(), valuesa.getDataBuffer(),
+ valuesb.getValidityBuffer(), valuesb.getDataBuffer()));
+
+ DecimalVector outVector = new DecimalVector("decimal_output", allocator, outputType.getPrecision(),
+ outputType.getScale());
+ outVector.allocateNew(numRows);
+
+ List<ValueVector> output = new ArrayList<ValueVector>();
+ output.add(outVector);
+ eval.evaluate(batch, output);
+
+ // should have scaled down.
+ BigDecimal[] expOutput = new BigDecimal[]{BigDecimal.valueOf(2.385612),
+ BigDecimal.valueOf(6.632525),
+ BigDecimal.valueOf(12.879439),
+ new BigDecimal("999999999999999999980000.000000")};
+
+ for (int i = 0; i < 4; i++) {
+ assertFalse(outVector.isNull(i));
+ assertTrue("index : " + i + " failed compare", expOutput[i].compareTo(outVector.getObject(i)
+ ) == 0);
+ }
+
+ // free buffers
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ eval.close();
+ }
+
+ @Test
+ public void testCompare() throws GandivaException {
+ Decimal aType = new Decimal(38, 3, 128);
+ Decimal bType = new Decimal(38, 2, 128);
+ Field a = Field.nullable("a", aType);
+ Field b = Field.nullable("b", bType);
+ List<Field> args = Lists.newArrayList(a, b);
+
+ List<ExpressionTree> exprs = new ArrayList<>(
+ Arrays.asList(
+ TreeBuilder.makeExpression("equal", args, Field.nullable("eq", boolType)),
+ TreeBuilder.makeExpression("not_equal", args, Field.nullable("ne", boolType)),
+ TreeBuilder.makeExpression("less_than", args, Field.nullable("lt", boolType)),
+ TreeBuilder.makeExpression("less_than_or_equal_to", args, Field.nullable("le", boolType)),
+ TreeBuilder.makeExpression("greater_than", args, Field.nullable("gt", boolType)),
+ TreeBuilder.makeExpression("greater_than_or_equal_to", args, Field.nullable("ge", boolType))
+ )
+ );
+
+ Schema schema = new Schema(args);
+ Projector eval = Projector.make(schema, exprs);
+
+ List<ValueVector> output = null;
+ ArrowRecordBatch batch = null;
+ try {
+ int numRows = 4;
+ String[] aValues = new String[]{"7.620", "2.380", "3.860", "-18.160"};
+ String[] bValues = new String[]{"7.62", "3.50", "1.90", "-1.45"};
+
+ DecimalVector valuesa = decimalVector(aValues, aType.getPrecision(), aType.getScale());
+ DecimalVector valuesb = decimalVector(bValues, bType.getPrecision(), bType.getScale());
+ batch =
+ new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(new ArrowFieldNode(numRows, 0), new ArrowFieldNode(numRows, 0)),
+ Lists.newArrayList(valuesa.getValidityBuffer(), valuesa.getDataBuffer(),
+ valuesb.getValidityBuffer(), valuesb.getDataBuffer()));
+
+ // expected results.
+ boolean[][] expected = {
+ {true, false, false, false}, // eq
+ {false, true, true, true}, // ne
+ {false, true, false, true}, // lt
+ {true, true, false, true}, // le
+ {false, false, true, false}, // gt
+ {true, false, true, false}, // ge
+ };
+
+ // Allocate output vectors.
+ output = new ArrayList<>(
+ Arrays.asList(
+ new BitVector("eq", allocator),
+ new BitVector("ne", allocator),
+ new BitVector("lt", allocator),
+ new BitVector("le", allocator),
+ new BitVector("gt", allocator),
+ new BitVector("ge", allocator)
+ )
+ );
+ for (ValueVector v : output) {
+ v.allocateNew();
+ }
+
+ // evaluate expressions.
+ eval.evaluate(batch, output);
+
+ // compare the outputs.
+ for (int idx = 0; idx < output.size(); ++idx) {
+ boolean[] expectedArray = expected[idx];
+ BitVector resultVector = (BitVector) output.get(idx);
+
+ for (int i = 0; i < numRows; i++) {
+ assertFalse(resultVector.isNull(i));
+ assertEquals("mismatch in result for expr at idx " + idx + " for row " + i,
+ expectedArray[i], resultVector.getObject(i).booleanValue());
+ }
+ }
+ } finally {
+ // free buffers
+ if (batch != null) {
+ releaseRecordBatch(batch);
+ }
+ if (output != null) {
+ releaseValueVectors(output);
+ }
+ eval.close();
+ }
+ }
+
+ @Test
+ public void testRound() throws GandivaException {
+ Decimal aType = new Decimal(38, 2, 128);
+ Decimal aWithScaleZero = new Decimal(38, 0, 128);
+ Decimal aWithScaleOne = new Decimal(38, 1, 128);
+ Field a = Field.nullable("a", aType);
+ List<Field> args = Lists.newArrayList(a);
+
+ List<ExpressionTree> exprs = new ArrayList<>(
+ Arrays.asList(
+ TreeBuilder.makeExpression("abs", args, Field.nullable("abs", aType)),
+ TreeBuilder.makeExpression("ceil", args, Field.nullable("ceil", aWithScaleZero)),
+ TreeBuilder.makeExpression("floor", args, Field.nullable("floor", aWithScaleZero)),
+ TreeBuilder.makeExpression("round", args, Field.nullable("round", aWithScaleZero)),
+ TreeBuilder.makeExpression("truncate", args, Field.nullable("truncate", aWithScaleZero)),
+ TreeBuilder.makeExpression(
+ TreeBuilder.makeFunction("round",
+ Lists.newArrayList(TreeBuilder.makeField(a), TreeBuilder.makeLiteral(1)),
+ aWithScaleOne),
+ Field.nullable("round_scale_1", aWithScaleOne)),
+ TreeBuilder.makeExpression(
+ TreeBuilder.makeFunction("truncate",
+ Lists.newArrayList(TreeBuilder.makeField(a), TreeBuilder.makeLiteral(1)),
+ aWithScaleOne),
+ Field.nullable("truncate_scale_1", aWithScaleOne))
+ )
+ );
+
+ Schema schema = new Schema(args);
+ Projector eval = Projector.make(schema, exprs);
+
+ List<ValueVector> output = null;
+ ArrowRecordBatch batch = null;
+ try {
+ int numRows = 4;
+ String[] aValues = new String[]{"1.23", "1.58", "-1.23", "-1.58"};
+
+ DecimalVector valuesa = decimalVector(aValues, aType.getPrecision(), aType.getScale());
+ batch =
+ new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(new ArrowFieldNode(numRows, 0)),
+ Lists.newArrayList(valuesa.getValidityBuffer(), valuesa.getDataBuffer()));
+
+ // expected results.
+ BigDecimal[][] expected = {
+ {BigDecimal.valueOf(1.23), BigDecimal.valueOf(1.58),
+ BigDecimal.valueOf(1.23), BigDecimal.valueOf(1.58)}, // abs
+ {BigDecimal.valueOf(2), BigDecimal.valueOf(2), BigDecimal.valueOf(-1), BigDecimal.valueOf(-1)}, // ceil
+ {BigDecimal.valueOf(1), BigDecimal.valueOf(1), BigDecimal.valueOf(-2), BigDecimal.valueOf(-2)}, // floor
+ {BigDecimal.valueOf(1), BigDecimal.valueOf(2), BigDecimal.valueOf(-1), BigDecimal.valueOf(-2)}, // round
+ {BigDecimal.valueOf(1), BigDecimal.valueOf(1), BigDecimal.valueOf(-1), BigDecimal.valueOf(-1)}, // truncate
+ {BigDecimal.valueOf(1.2), BigDecimal.valueOf(1.6),
+ BigDecimal.valueOf(-1.2), BigDecimal.valueOf(-1.6)}, // round-to-scale-1
+ {BigDecimal.valueOf(1.2), BigDecimal.valueOf(1.5),
+ BigDecimal.valueOf(-1.2), BigDecimal.valueOf(-1.5)}, // truncate-to-scale-1
+ };
+
+ // Allocate output vectors.
+ output = new ArrayList<>(
+ Arrays.asList(
+ new DecimalVector("abs", allocator, aType.getPrecision(), aType.getScale()),
+ new DecimalVector("ceil", allocator, aType.getPrecision(), 0),
+ new DecimalVector("floor", allocator, aType.getPrecision(), 0),
+ new DecimalVector("round", allocator, aType.getPrecision(), 0),
+ new DecimalVector("truncate", allocator, aType.getPrecision(), 0),
+ new DecimalVector("round_to_scale_1", allocator, aType.getPrecision(), 1),
+ new DecimalVector("truncate_to_scale_1", allocator, aType.getPrecision(), 1)
+ )
+ );
+ for (ValueVector v : output) {
+ v.allocateNew();
+ }
+
+ // evaluate expressions.
+ eval.evaluate(batch, output);
+
+ // compare the outputs.
+ for (int idx = 0; idx < output.size(); ++idx) {
+ BigDecimal[] expectedArray = expected[idx];
+ DecimalVector resultVector = (DecimalVector) output.get(idx);
+
+ for (int i = 0; i < numRows; i++) {
+ assertFalse(resultVector.isNull(i));
+ assertTrue("mismatch in result for " +
+ "field " + resultVector.getField().getName() +
+ " for row " + i +
+ " expected " + expectedArray[i] +
+ ", got " + resultVector.getObject(i),
+ expectedArray[i].compareTo(resultVector.getObject(i)) == 0);
+ }
+ }
+ } finally {
+ // free buffers
+ if (batch != null) {
+ releaseRecordBatch(batch);
+ }
+ if (output != null) {
+ releaseValueVectors(output);
+ }
+ eval.close();
+ }
+ }
+
+ @Test
+ public void testCastToDecimal() throws GandivaException {
+ Decimal decimalType = new Decimal(38, 2, 128);
+ Decimal decimalWithScaleOne = new Decimal(38, 1, 128);
+ Field dec = Field.nullable("dec", decimalType);
+ Field int64f = Field.nullable("int64", int64);
+ Field doublef = Field.nullable("float64", float64);
+
+ List<ExpressionTree> exprs = new ArrayList<>(
+ Arrays.asList(
+ TreeBuilder.makeExpression("castDECIMAL",
+ Lists.newArrayList(int64f),
+ Field.nullable("int64_to_dec", decimalType)),
+
+ TreeBuilder.makeExpression("castDECIMAL",
+ Lists.newArrayList(doublef),
+ Field.nullable("float64_to_dec", decimalType)),
+
+ TreeBuilder.makeExpression("castDECIMAL",
+ Lists.newArrayList(dec),
+ Field.nullable("dec_to_dec", decimalWithScaleOne))
+ )
+ );
+
+ Schema schema = new Schema(Lists.newArrayList(int64f, doublef, dec));
+ Projector eval = Projector.make(schema, exprs);
+
+ List<ValueVector> output = null;
+ ArrowRecordBatch batch = null;
+ try {
+ int numRows = 4;
+ String[] aValues = new String[]{"1.23", "1.58", "-1.23", "-1.58"};
+ DecimalVector valuesa = decimalVector(aValues, decimalType.getPrecision(), decimalType.getScale());
+ batch = new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(
+ new ArrowFieldNode(numRows, 0),
+ new ArrowFieldNode(numRows, 0),
+ new ArrowFieldNode(numRows, 0)),
+ Lists.newArrayList(
+ arrowBufWithAllValid(4),
+ longBuf(new long[]{123, 158, -123, -158}),
+ arrowBufWithAllValid(4),
+ doubleBuf(new double[]{1.23, 1.58, -1.23, -1.58}),
+ valuesa.getValidityBuffer(),
+ valuesa.getDataBuffer())
+ );
+
+ // Allocate output vectors.
+ output = new ArrayList<>(
+ Arrays.asList(
+ new DecimalVector("int64_to_dec", allocator, decimalType.getPrecision(), decimalType.getScale()),
+ new DecimalVector("float64_to_dec", allocator, decimalType.getPrecision(), decimalType.getScale()),
+ new DecimalVector("dec_to_dec", allocator,
+ decimalWithScaleOne.getPrecision(), decimalWithScaleOne.getScale())
+ )
+ );
+ for (ValueVector v : output) {
+ v.allocateNew();
+ }
+
+ // evaluate expressions.
+ eval.evaluate(batch, output);
+
+ // compare the outputs.
+ BigDecimal[][] expected = {
+ { BigDecimal.valueOf(123), BigDecimal.valueOf(158),
+ BigDecimal.valueOf(-123), BigDecimal.valueOf(-158)},
+ { BigDecimal.valueOf(1.23), BigDecimal.valueOf(1.58),
+ BigDecimal.valueOf(-1.23), BigDecimal.valueOf(-1.58)},
+ { BigDecimal.valueOf(1.2), BigDecimal.valueOf(1.6),
+ BigDecimal.valueOf(-1.2), BigDecimal.valueOf(-1.6)}
+ };
+ for (int idx = 0; idx < output.size(); ++idx) {
+ BigDecimal[] expectedArray = expected[idx];
+ DecimalVector resultVector = (DecimalVector) output.get(idx);
+ for (int i = 0; i < numRows; i++) {
+ assertFalse(resultVector.isNull(i));
+ assertTrue("mismatch in result for " +
+ "field " + resultVector.getField().getName() +
+ " for row " + i +
+ " expected " + expectedArray[i] +
+ ", got " + resultVector.getObject(i),
+ expectedArray[i].compareTo(resultVector.getObject(i)) == 0);
+ }
+ }
+ } finally {
+ // free buffers
+ if (batch != null) {
+ releaseRecordBatch(batch);
+ }
+ if (output != null) {
+ releaseValueVectors(output);
+ }
+ eval.close();
+ }
+ }
+
+ @Test
+ public void testCastToLong() throws GandivaException {
+ Decimal decimalType = new Decimal(38, 2, 128);
+ Field dec = Field.nullable("dec", decimalType);
+
+ Schema schema = new Schema(Lists.newArrayList(dec));
+ Projector eval = Projector.make(schema,
+ Lists.newArrayList(
+ TreeBuilder.makeExpression("castBIGINT",
+ Lists.newArrayList(dec),
+ Field.nullable("dec_to_int64", int64)
+ )
+ )
+ );
+
+ List<ValueVector> output = null;
+ ArrowRecordBatch batch = null;
+ try {
+ int numRows = 5;
+ String[] aValues = new String[]{"1.23", "1.50", "98765.78", "-1.23", "-1.58"};
+ DecimalVector valuesa = decimalVector(aValues, decimalType.getPrecision(), decimalType.getScale());
+ batch = new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(
+ new ArrowFieldNode(numRows, 0)
+ ),
+ Lists.newArrayList(
+ valuesa.getValidityBuffer(),
+ valuesa.getDataBuffer()
+ )
+ );
+
+ // Allocate output vectors.
+ BigIntVector resultVector = new BigIntVector("dec_to_int64", allocator);
+ resultVector.allocateNew();
+ output = new ArrayList<>(Arrays.asList(resultVector));
+
+ // evaluate expressions.
+ eval.evaluate(batch, output);
+
+ // compare the outputs.
+ long[] expected = {1, 2, 98766, -1, -2};
+ for (int i = 0; i < numRows; i++) {
+ assertFalse(resultVector.isNull(i));
+ assertEquals(expected[i], resultVector.get(i));
+ }
+ } finally {
+ // free buffers
+ if (batch != null) {
+ releaseRecordBatch(batch);
+ }
+ if (output != null) {
+ releaseValueVectors(output);
+ }
+ eval.close();
+ }
+ }
+
+ @Test
+ public void testCastToDouble() throws GandivaException {
+ Decimal decimalType = new Decimal(38, 2, 128);
+ Field dec = Field.nullable("dec", decimalType);
+
+ Schema schema = new Schema(Lists.newArrayList(dec));
+ Projector eval = Projector.make(schema,
+ Lists.newArrayList(
+ TreeBuilder.makeExpression("castFLOAT8",
+ Lists.newArrayList(dec),
+ Field.nullable("dec_to_float64", float64)
+ )
+ )
+ );
+
+ List<ValueVector> output = null;
+ ArrowRecordBatch batch = null;
+ try {
+ int numRows = 4;
+ String[] aValues = new String[]{"1.23", "1.58", "-1.23", "-1.58"};
+ DecimalVector valuesa = decimalVector(aValues, decimalType.getPrecision(), decimalType.getScale());
+ batch = new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(
+ new ArrowFieldNode(numRows, 0)
+ ),
+ Lists.newArrayList(
+ valuesa.getValidityBuffer(),
+ valuesa.getDataBuffer()
+ )
+ );
+
+ // Allocate output vectors.
+ Float8Vector resultVector = new Float8Vector("dec_to_float64", allocator);
+ resultVector.allocateNew();
+ output = new ArrayList<>(Arrays.asList(resultVector));
+
+ // evaluate expressions.
+ eval.evaluate(batch, output);
+
+ // compare the outputs.
+ double[] expected = {1.23, 1.58, -1.23, -1.58};
+ for (int i = 0; i < numRows; i++) {
+ assertFalse(resultVector.isNull(i));
+ assertEquals(expected[i], resultVector.get(i), 0);
+ }
+ } finally {
+ // free buffers
+ if (batch != null) {
+ releaseRecordBatch(batch);
+ }
+ if (output != null) {
+ releaseValueVectors(output);
+ }
+ eval.close();
+ }
+ }
+
+ @Test
+ public void testCastToString() throws GandivaException {
+ Decimal decimalType = new Decimal(38, 2, 128);
+ Field dec = Field.nullable("dec", decimalType);
+ Field str = Field.nullable("str", new ArrowType.Utf8());
+ TreeNode field = TreeBuilder.makeField(dec);
+ TreeNode literal = TreeBuilder.makeLiteral(5L);
+ List<TreeNode> args = Lists.newArrayList(field, literal);
+ TreeNode cast = TreeBuilder.makeFunction("castVARCHAR", args, new ArrowType.Utf8());
+ TreeNode root = TreeBuilder.makeFunction("equal",
+ Lists.newArrayList(cast, TreeBuilder.makeField(str)), new ArrowType.Bool());
+ ExpressionTree tree = TreeBuilder.makeExpression(root, Field.nullable("are_equal", new ArrowType.Bool()));
+
+ Schema schema = new Schema(Lists.newArrayList(dec, str));
+ Projector eval = Projector.make(schema, Lists.newArrayList(tree)
+ );
+
+ List<ValueVector> output = null;
+ ArrowRecordBatch batch = null;
+ try {
+ int numRows = 4;
+ String[] aValues = new String[]{"10.51", "100.23", "-1000.23", "-0000.10"};
+ String[] expected = {"10.51", "100.2", "-1000", "-0.10"};
+ DecimalVector valuesa = decimalVector(aValues, decimalType.getPrecision(), decimalType.getScale());
+ VarCharVector result = varcharVector(expected);
+ batch = new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(
+ new ArrowFieldNode(numRows, 0)
+ ),
+ Lists.newArrayList(
+ valuesa.getValidityBuffer(),
+ valuesa.getDataBuffer(),
+ result.getValidityBuffer(),
+ result.getOffsetBuffer(),
+ result.getDataBuffer()
+ )
+ );
+
+ BitVector resultVector = new BitVector("res", allocator);
+ resultVector.allocateNew();
+ output = new ArrayList<>(Arrays.asList(resultVector));
+
+ // evaluate expressions.
+ eval.evaluate(batch, output);
+
+ // compare the outputs.
+ for (int i = 0; i < numRows; i++) {
+ assertTrue(resultVector.getObject(i).booleanValue());
+ }
+ } finally {
+ // free buffers
+ if (batch != null) {
+ releaseRecordBatch(batch);
+ }
+ if (output != null) {
+ releaseValueVectors(output);
+ }
+ eval.close();
+ }
+ }
+
+ @Test
+ public void testCastStringToDecimal() throws GandivaException {
+ Decimal decimalType = new Decimal(4, 2, 128);
+ Field dec = Field.nullable("dec", decimalType);
+
+ Field str = Field.nullable("str", new ArrowType.Utf8());
+ TreeNode field = TreeBuilder.makeField(str);
+ List<TreeNode> args = Lists.newArrayList(field);
+ TreeNode cast = TreeBuilder.makeFunction("castDECIMAL", args, decimalType);
+ ExpressionTree tree = TreeBuilder.makeExpression(cast, Field.nullable("dec_str", decimalType));
+
+ Schema schema = new Schema(Lists.newArrayList(str));
+ Projector eval = Projector.make(schema, Lists.newArrayList(tree)
+ );
+
+ List<ValueVector> output = null;
+ ArrowRecordBatch batch = null;
+ try {
+ int numRows = 4;
+ String[] aValues = new String[]{"10.5134", "-0.1", "10.516", "-1000"};
+ VarCharVector valuesa = varcharVector(aValues);
+ batch = new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(
+ new ArrowFieldNode(numRows, 0)
+ ),
+ Lists.newArrayList(
+ valuesa.getValidityBuffer(),
+ valuesa.getOffsetBuffer(),
+ valuesa.getDataBuffer()
+ )
+ );
+
+ DecimalVector resultVector = new DecimalVector("res", allocator,
+ decimalType.getPrecision(), decimalType.getScale());
+ resultVector.allocateNew();
+ output = new ArrayList<>(Arrays.asList(resultVector));
+
+ BigDecimal[] expected = {BigDecimal.valueOf(10.51), BigDecimal.valueOf(-0.10),
+ BigDecimal.valueOf(10.52), BigDecimal.valueOf(0.00)};
+ // evaluate expressions.
+ eval.evaluate(batch, output);
+
+ // compare the outputs.
+ for (int i = 0; i < numRows; i++) {
+ assertTrue("mismatch in result for " +
+ "field " + resultVector.getField().getName() +
+ " for row " + i +
+ " expected " + expected[i] +
+ ", got " + resultVector.getObject(i), expected[i].compareTo(resultVector.getObject(i)) == 0);
+ }
+ } finally {
+ // free buffers
+ if (batch != null) {
+ releaseRecordBatch(batch);
+ }
+ if (output != null) {
+ releaseValueVectors(output);
+ }
+ eval.close();
+ }
+ }
+
+ @Test
+ public void testInvalidDecimal() throws GandivaException {
+ exception.expect(IllegalArgumentException.class);
+ exception.expectMessage("Gandiva only supports decimals of upto 38 precision. Input precision" +
+ " : 0");
+ Decimal decimalType = new Decimal(0, 0, 128);
+ Field int64f = Field.nullable("int64", int64);
+
+ Schema schema = new Schema(Lists.newArrayList(int64f));
+ Projector eval = Projector.make(schema,
+ Lists.newArrayList(
+ TreeBuilder.makeExpression("castDECIMAL",
+ Lists.newArrayList(int64f),
+ Field.nullable("invalid_dec", decimalType)
+ )
+ )
+ );
+ }
+
+ @Test
+ public void testInvalidDecimalGt38() throws GandivaException {
+ exception.expect(IllegalArgumentException.class);
+ exception.expectMessage("Gandiva only supports decimals of upto 38 precision. Input precision" +
+ " : 42");
+ Decimal decimalType = new Decimal(42, 0, 128);
+ Field int64f = Field.nullable("int64", int64);
+
+ Schema schema = new Schema(Lists.newArrayList(int64f));
+ Projector eval = Projector.make(schema,
+ Lists.newArrayList(
+ TreeBuilder.makeExpression("castDECIMAL",
+ Lists.newArrayList(int64f),
+ Field.nullable("invalid_dec", decimalType)
+ )
+ )
+ );
+ }
+}
+
diff --git a/src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java b/src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
new file mode 100644
index 000000000..03c9377b0
--- /dev/null
+++ b/src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
@@ -0,0 +1,2470 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.evaluator;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.math.BigDecimal;
+import java.nio.charset.Charset;
+import java.time.Instant;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.stream.IntStream;
+
+import org.apache.arrow.gandiva.exceptions.GandivaException;
+import org.apache.arrow.gandiva.expression.ExpressionTree;
+import org.apache.arrow.gandiva.expression.TreeBuilder;
+import org.apache.arrow.gandiva.expression.TreeNode;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.BitVector;
+import org.apache.arrow.vector.DecimalVector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.types.DateUnit;
+import org.apache.arrow.vector.types.IntervalUnit;
+import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.Assert;
+import org.junit.Ignore;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+
+public class ProjectorTest extends BaseEvaluatorTest {
+
+ private Charset utf8Charset = Charset.forName("UTF-8");
+ private Charset utf16Charset = Charset.forName("UTF-16");
+
+ @Rule
+ public ExpectedException thrown = ExpectedException.none();
+
+ List<ArrowBuf> varBufs(String[] strings, Charset charset) {
+ ArrowBuf offsetsBuffer = allocator.buffer((strings.length + 1) * 4);
+
+ long dataBufferSize = 0L;
+ for (String string : strings) {
+ dataBufferSize += string.getBytes(charset).length;
+ }
+
+ ArrowBuf dataBuffer = allocator.buffer(dataBufferSize);
+
+ int startOffset = 0;
+ for (int i = 0; i < strings.length; i++) {
+ offsetsBuffer.writeInt(startOffset);
+
+ final byte[] bytes = strings[i].getBytes(charset);
+ dataBuffer = dataBuffer.reallocIfNeeded(dataBuffer.writerIndex() + bytes.length);
+ dataBuffer.setBytes(startOffset, bytes, 0, bytes.length);
+ startOffset += bytes.length;
+ }
+ offsetsBuffer.writeInt(startOffset); // offset for the last element
+
+ return Arrays.asList(offsetsBuffer, dataBuffer);
+ }
+
+ List<ArrowBuf> stringBufs(String[] strings) {
+ return varBufs(strings, utf8Charset);
+ }
+
+ List<ArrowBuf> binaryBufs(String[] strings) {
+ return varBufs(strings, utf16Charset);
+ }
+
+ private void testMakeProjectorParallel(ConfigurationBuilder.ConfigOptions configOptions) throws InterruptedException {
+ List<Schema> schemas = Lists.newArrayList();
+ Field a = Field.nullable("a", int64);
+ Field b = Field.nullable("b", int64);
+ IntStream.range(0, 1000)
+ .forEach(
+ i -> {
+ Field c = Field.nullable("" + i, int64);
+ List<Field> cols = Lists.newArrayList(a, b, c);
+ schemas.add(new Schema(cols));
+ });
+
+ TreeNode aNode = TreeBuilder.makeField(a);
+ TreeNode bNode = TreeBuilder.makeField(b);
+ List<TreeNode> args = Lists.newArrayList(aNode, bNode);
+
+ TreeNode cond = TreeBuilder.makeFunction("greater_than", args, boolType);
+ TreeNode ifNode = TreeBuilder.makeIf(cond, aNode, bNode, int64);
+
+ ExpressionTree expr = TreeBuilder.makeExpression(ifNode, Field.nullable("c", int64));
+ List<ExpressionTree> exprs = Lists.newArrayList(expr);
+
+ // build projectors in parallel choosing schema at random
+ // this should hit the same cache entry thus exposing
+ // any threading issues.
+ ExecutorService executors = Executors.newFixedThreadPool(16);
+
+ IntStream.range(0, 1000)
+ .forEach(
+ i -> {
+ executors.submit(
+ () -> {
+ try {
+ Projector evaluator = configOptions == null ?
+ Projector.make(schemas.get((int) (Math.random() * 100)), exprs) :
+ Projector.make(schemas.get((int) (Math.random() * 100)), exprs, configOptions);
+ evaluator.close();
+ } catch (GandivaException e) {
+ e.printStackTrace();
+ }
+ });
+ });
+ executors.shutdown();
+ executors.awaitTermination(100, java.util.concurrent.TimeUnit.SECONDS);
+ }
+
+ @Test
+ public void testMakeProjectorParallel() throws Exception {
+ testMakeProjectorParallel(null);
+ testMakeProjectorParallel(new ConfigurationBuilder.ConfigOptions().withTargetCPU(false));
+ testMakeProjectorParallel(new ConfigurationBuilder.ConfigOptions().withTargetCPU(false).withOptimize(false));
+ }
+
+ // Will be fixed by https://issues.apache.org/jira/browse/ARROW-4371
+ @Ignore
+ @Test
+ public void testMakeProjector() throws GandivaException {
+ Field a = Field.nullable("a", int64);
+ Field b = Field.nullable("b", int64);
+ TreeNode aNode = TreeBuilder.makeField(a);
+ TreeNode bNode = TreeBuilder.makeField(b);
+ List<TreeNode> args = Lists.newArrayList(aNode, bNode);
+
+ List<Field> cols = Lists.newArrayList(a, b);
+ Schema schema = new Schema(cols);
+
+ TreeNode cond = TreeBuilder.makeFunction("greater_than", args, boolType);
+ TreeNode ifNode = TreeBuilder.makeIf(cond, aNode, bNode, int64);
+
+ ExpressionTree expr = TreeBuilder.makeExpression(ifNode, Field.nullable("c", int64));
+ List<ExpressionTree> exprs = Lists.newArrayList(expr);
+
+ long startTime = System.currentTimeMillis();
+ Projector evaluator1 = Projector.make(schema, exprs);
+ System.out.println(
+ "Projector build: iteration 1 took " + (System.currentTimeMillis() - startTime) + " ms");
+ startTime = System.currentTimeMillis();
+ Projector evaluator2 = Projector.make(schema, exprs);
+ System.out.println(
+ "Projector build: iteration 2 took " + (System.currentTimeMillis() - startTime) + " ms");
+ startTime = System.currentTimeMillis();
+ Projector evaluator3 = Projector.make(schema, exprs);
+ long timeToMakeProjector = (System.currentTimeMillis() - startTime);
+ // should be getting the projector from the cache;
+ // giving 5ms for varying system load.
+ Assert.assertTrue(timeToMakeProjector < 5L);
+
+ evaluator1.close();
+ evaluator2.close();
+ evaluator3.close();
+ }
+
+ @Test
+ public void testMakeProjectorValidationError() throws InterruptedException {
+
+ Field a = Field.nullable("a", int64);
+ TreeNode aNode = TreeBuilder.makeField(a);
+ List<TreeNode> args = Lists.newArrayList(aNode);
+
+ List<Field> cols = Lists.newArrayList(a);
+ Schema schema = new Schema(cols);
+
+ TreeNode cond = TreeBuilder.makeFunction("non_existent_fn", args, boolType);
+
+ ExpressionTree expr = TreeBuilder.makeExpression(cond, Field.nullable("c", int64));
+ List<ExpressionTree> exprs = Lists.newArrayList(expr);
+
+ boolean exceptionThrown = false;
+ try {
+ Projector evaluator1 = Projector.make(schema, exprs);
+ } catch (GandivaException e) {
+ exceptionThrown = true;
+ }
+
+ Assert.assertTrue(exceptionThrown);
+
+ // allow GC to collect any temp resources.
+ Thread.sleep(1000);
+
+ // try again to ensure no temporary resources.
+ exceptionThrown = false;
+ try {
+ Projector evaluator1 = Projector.make(schema, exprs);
+ } catch (GandivaException e) {
+ exceptionThrown = true;
+ }
+
+ Assert.assertTrue(exceptionThrown);
+ }
+
+ @Test
+ public void testEvaluate() throws GandivaException, Exception {
+ Field a = Field.nullable("a", int32);
+ Field b = Field.nullable("b", int32);
+ List<Field> args = Lists.newArrayList(a, b);
+
+ Field retType = Field.nullable("c", int32);
+ ExpressionTree root = TreeBuilder.makeExpression("add", args, retType);
+
+ List<ExpressionTree> exprs = Lists.newArrayList(root);
+
+ Schema schema = new Schema(args);
+ Projector eval = Projector.make(schema, exprs);
+
+ int numRows = 16;
+ byte[] validity = new byte[]{(byte) 255, 0};
+ // second half is "undefined"
+ int[] aValues = new int[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+ int[] bValues = new int[]{16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1};
+
+ ArrowBuf validitya = buf(validity);
+ ArrowBuf valuesa = intBuf(aValues);
+ ArrowBuf validityb = buf(validity);
+ ArrowBuf valuesb = intBuf(bValues);
+ ArrowRecordBatch batch =
+ new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(new ArrowFieldNode(numRows, 8), new ArrowFieldNode(numRows, 8)),
+ Lists.newArrayList(validitya, valuesa, validityb, valuesb));
+
+ IntVector intVector = new IntVector(EMPTY_SCHEMA_PATH, allocator);
+ intVector.allocateNew(numRows);
+
+ List<ValueVector> output = new ArrayList<ValueVector>();
+ output.add(intVector);
+ eval.evaluate(batch, output);
+
+ for (int i = 0; i < 8; i++) {
+ assertFalse(intVector.isNull(i));
+ assertEquals(17, intVector.get(i));
+ }
+ for (int i = 8; i < 16; i++) {
+ assertTrue(intVector.isNull(i));
+ }
+
+ // free buffers
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ eval.close();
+ }
+
+ @Test
+ public void testEvaluateDivZero() throws GandivaException, Exception {
+ Field a = Field.nullable("a", int32);
+ Field b = Field.nullable("b", int32);
+ List<Field> args = Lists.newArrayList(a, b);
+
+ Field retType = Field.nullable("c", int32);
+ ExpressionTree root = TreeBuilder.makeExpression("divide", args, retType);
+
+ List<ExpressionTree> exprs = Lists.newArrayList(root);
+
+ Schema schema = new Schema(args);
+ Projector eval = Projector.make(schema, exprs);
+
+ int numRows = 2;
+ byte[] validity = new byte[]{(byte) 255};
+ // second half is "undefined"
+ int[] aValues = new int[]{2, 2};
+ int[] bValues = new int[]{1, 0};
+
+ ArrowBuf validitya = buf(validity);
+ ArrowBuf valuesa = intBuf(aValues);
+ ArrowBuf validityb = buf(validity);
+ ArrowBuf valuesb = intBuf(bValues);
+ ArrowRecordBatch batch = new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(new ArrowFieldNode(numRows, 0), new ArrowFieldNode(numRows, 0)),
+ Lists.newArrayList(validitya, valuesa, validityb, valuesb));
+
+ IntVector intVector = new IntVector(EMPTY_SCHEMA_PATH, allocator);
+ intVector.allocateNew(numRows);
+
+ List<ValueVector> output = new ArrayList<ValueVector>();
+ output.add(intVector);
+ boolean exceptionThrown = false;
+ try {
+ eval.evaluate(batch, output);
+ } catch (GandivaException e) {
+ Assert.assertTrue(e.getMessage().contains("divide by zero"));
+ exceptionThrown = true;
+ }
+ Assert.assertTrue(exceptionThrown);
+
+ // free buffers
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ eval.close();
+ }
+
+ @Test
+ public void testDivZeroParallel() throws GandivaException, InterruptedException {
+ Field a = Field.nullable("a", int32);
+ Field b = Field.nullable("b", int32);
+ Field c = Field.nullable("c", int32);
+ List<Field> cols = Lists.newArrayList(a, b);
+ Schema s = new Schema(cols);
+
+ List<Field> args = Lists.newArrayList(a, b);
+
+ ExpressionTree expr = TreeBuilder.makeExpression("divide", args, c);
+ List<ExpressionTree> exprs = Lists.newArrayList(expr);
+
+ ExecutorService executors = Executors.newFixedThreadPool(16);
+
+ AtomicInteger errorCount = new AtomicInteger(0);
+ AtomicInteger errorCountExp = new AtomicInteger(0);
+ // pre-build the projector so that same projector is used for all executions.
+ Projector test = Projector.make(s, exprs);
+
+ IntStream.range(0, 1000).forEach(i -> {
+ executors.submit(() -> {
+ try {
+ Projector evaluator = Projector.make(s, exprs);
+ int numRows = 2;
+ byte[] validity = new byte[]{(byte) 255};
+ int[] aValues = new int[]{2, 2};
+ int[] bValues;
+ if (i % 2 == 0) {
+ errorCountExp.incrementAndGet();
+ bValues = new int[]{1, 0};
+ } else {
+ bValues = new int[]{1, 1};
+ }
+
+ ArrowBuf validitya = buf(validity);
+ ArrowBuf valuesa = intBuf(aValues);
+ ArrowBuf validityb = buf(validity);
+ ArrowBuf valuesb = intBuf(bValues);
+ ArrowRecordBatch batch = new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(new ArrowFieldNode(numRows, 0), new ArrowFieldNode(numRows,
+ 0)),
+ Lists.newArrayList(validitya, valuesa, validityb, valuesb));
+
+ IntVector intVector = new IntVector(EMPTY_SCHEMA_PATH, allocator);
+ intVector.allocateNew(numRows);
+
+ List<ValueVector> output = new ArrayList<ValueVector>();
+ output.add(intVector);
+ try {
+ evaluator.evaluate(batch, output);
+ } catch (GandivaException e) {
+ errorCount.incrementAndGet();
+ }
+ // free buffers
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ evaluator.close();
+ } catch (GandivaException ignore) {
+ }
+ });
+ });
+ executors.shutdown();
+ executors.awaitTermination(100, java.util.concurrent.TimeUnit.SECONDS);
+ test.close();
+ Assert.assertEquals(errorCountExp.intValue(), errorCount.intValue());
+ }
+
+ @Test
+ public void testAdd3() throws GandivaException, Exception {
+ Field x = Field.nullable("x", int32);
+ Field n2x = Field.nullable("n2x", int32);
+ Field n3x = Field.nullable("n3x", int32);
+
+ List<TreeNode> args = new ArrayList<TreeNode>();
+
+ // x + n2x + n3x
+ TreeNode add1 =
+ TreeBuilder.makeFunction(
+ "add", Lists.newArrayList(TreeBuilder.makeField(x), TreeBuilder.makeField(n2x)), int32);
+ TreeNode add =
+ TreeBuilder.makeFunction(
+ "add", Lists.newArrayList(add1, TreeBuilder.makeField(n3x)), int32);
+ ExpressionTree expr = TreeBuilder.makeExpression(add, x);
+
+ List<Field> cols = Lists.newArrayList(x, n2x, n3x);
+ Schema schema = new Schema(cols);
+
+ Projector eval = Projector.make(schema, Lists.newArrayList(expr));
+
+ int numRows = 16;
+ byte[] validity = new byte[]{(byte) 255, 0};
+ // second half is "undefined"
+ int[] xValues = new int[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+ int[] n2xValues = new int[]{16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1};
+ int[] n3xValues = new int[]{1, 2, 3, 4, 4, 3, 2, 1, 5, 6, 7, 8, 8, 7, 6, 5};
+
+ int[] expected = new int[]{18, 19, 20, 21, 21, 20, 19, 18, 18, 19, 20, 21, 21, 20, 19, 18};
+
+ ArrowBuf xValidity = buf(validity);
+ ArrowBuf xData = intBuf(xValues);
+ ArrowBuf n2xValidity = buf(validity);
+ ArrowBuf n2xData = intBuf(n2xValues);
+ ArrowBuf n3xValidity = buf(validity);
+ ArrowBuf n3xData = intBuf(n3xValues);
+
+ ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 8);
+ ArrowRecordBatch batch =
+ new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(fieldNode, fieldNode, fieldNode),
+ Lists.newArrayList(xValidity, xData, n2xValidity, n2xData, n3xValidity, n3xData));
+
+ IntVector intVector = new IntVector(EMPTY_SCHEMA_PATH, allocator);
+ intVector.allocateNew(numRows);
+
+ List<ValueVector> output = new ArrayList<ValueVector>();
+ output.add(intVector);
+ eval.evaluate(batch, output);
+
+ for (int i = 0; i < 8; i++) {
+ assertFalse(intVector.isNull(i));
+ assertEquals(expected[i], intVector.get(i));
+ }
+ for (int i = 8; i < 16; i++) {
+ assertTrue(intVector.isNull(i));
+ }
+
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ eval.close();
+ }
+
+ @Test
+ public void testStringFields() throws GandivaException {
+ /*
+ * when x < "hello" then octet_length(x) + a
+ * else octet_length(x) + b
+ */
+
+ Field x = Field.nullable("x", new ArrowType.Utf8());
+ Field a = Field.nullable("a", new ArrowType.Int(32, true));
+ Field b = Field.nullable("b", new ArrowType.Int(32, true));
+
+ ArrowType retType = new ArrowType.Int(32, true);
+
+ TreeNode cond =
+ TreeBuilder.makeFunction(
+ "less_than",
+ Lists.newArrayList(TreeBuilder.makeField(x), TreeBuilder.makeStringLiteral("hello")),
+ boolType);
+ TreeNode octetLenFuncNode =
+ TreeBuilder.makeFunction(
+ "octet_length", Lists.newArrayList(TreeBuilder.makeField(x)), retType);
+ TreeNode octetLenPlusANode =
+ TreeBuilder.makeFunction(
+ "add", Lists.newArrayList(TreeBuilder.makeField(a), octetLenFuncNode), retType);
+ TreeNode octetLenPlusBNode =
+ TreeBuilder.makeFunction(
+ "add", Lists.newArrayList(TreeBuilder.makeField(b), octetLenFuncNode), retType);
+
+ TreeNode ifHello = TreeBuilder.makeIf(cond, octetLenPlusANode, octetLenPlusBNode, retType);
+
+ ExpressionTree expr = TreeBuilder.makeExpression(ifHello, Field.nullable("res", retType));
+ Schema schema = new Schema(Lists.newArrayList(a, x, b));
+ Projector eval = Projector.make(schema, Lists.newArrayList(expr));
+
+ int numRows = 5;
+ byte[] validity = new byte[]{(byte) 255, 0};
+ // "A função" means "The function" in portugese
+ String[] valuesX = new String[]{"hell", "abc", "hellox", "ijk", "A função"};
+ int[] valuesA = new int[]{10, 20, 30, 40, 50};
+ int[] valuesB = new int[]{110, 120, 130, 140, 150};
+ int[] expected = new int[]{14, 23, 136, 143, 60};
+
+ ArrowBuf validityX = buf(validity);
+ List<ArrowBuf> dataBufsX = stringBufs(valuesX);
+ ArrowBuf validityA = buf(validity);
+ ArrowBuf dataA = intBuf(valuesA);
+ ArrowBuf validityB = buf(validity);
+ ArrowBuf dataB = intBuf(valuesB);
+
+ ArrowRecordBatch batch =
+ new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(new ArrowFieldNode(numRows, 0), new ArrowFieldNode(numRows, 0)),
+ Lists.newArrayList(
+ validityA, dataA, validityX, dataBufsX.get(0), dataBufsX.get(1), validityB, dataB));
+
+ IntVector intVector = new IntVector(EMPTY_SCHEMA_PATH, allocator);
+ intVector.allocateNew(numRows);
+
+ List<ValueVector> output = new ArrayList<ValueVector>();
+ output.add(intVector);
+ eval.evaluate(batch, output);
+
+ for (int i = 0; i < numRows; i++) {
+ assertFalse(intVector.isNull(i));
+ assertEquals(expected[i], intVector.get(i));
+ }
+
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ eval.close();
+ }
+
+ @Test
+ public void testStringOutput() throws GandivaException {
+ /*
+ * if (x >= 0) "hi" else "bye"
+ */
+
+ Field x = Field.nullable("x", new ArrowType.Int(32, true));
+
+ ArrowType retType = new ArrowType.Utf8();
+
+ TreeNode ifHiBye = TreeBuilder.makeIf(
+ TreeBuilder.makeFunction(
+ "greater_than_or_equal_to",
+ Lists.newArrayList(
+ TreeBuilder.makeField(x),
+ TreeBuilder.makeLiteral(0)
+ ),
+ boolType),
+ TreeBuilder.makeStringLiteral("hi"),
+ TreeBuilder.makeStringLiteral("bye"),
+ retType);
+
+ ExpressionTree expr = TreeBuilder.makeExpression(ifHiBye, Field.nullable("res", retType));
+ Schema schema = new Schema(Lists.newArrayList(x));
+ Projector eval = Projector.make(schema, Lists.newArrayList(expr));
+
+ // fill up input record batch
+ int numRows = 4;
+ byte[] validity = new byte[]{(byte) 255, 0};
+ int[] xValues = new int[]{10, -10, 20, -20};
+ String[] expected = new String[]{"hi", "bye", "hi", "bye"};
+ ArrowBuf validityX = buf(validity);
+ ArrowBuf dataX = intBuf(xValues);
+ ArrowRecordBatch batch =
+ new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(new ArrowFieldNode(numRows, 0)),
+ Lists.newArrayList( validityX, dataX));
+
+ // allocate data for output vector.
+ VarCharVector outVector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator);
+ outVector.allocateNew(64, numRows);
+
+
+ // evaluate expression
+ List<ValueVector> output = new ArrayList<>();
+ output.add(outVector);
+ eval.evaluate(batch, output);
+
+ // match expected output.
+ for (int i = 0; i < numRows; i++) {
+ assertFalse(outVector.isNull(i));
+ assertEquals(expected[i], new String(outVector.get(i)));
+ }
+
+ // test with insufficient data buffer.
+ try {
+ outVector.allocateNew(4, numRows);
+ eval.evaluate(batch, output);
+ } finally {
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ eval.close();
+ }
+ }
+
+ @Test
+ public void testRegex() throws GandivaException {
+ /*
+ * like "%map%"
+ */
+
+ Field x = Field.nullable("x", new ArrowType.Utf8());
+
+ TreeNode cond =
+ TreeBuilder.makeFunction(
+ "like",
+ Lists.newArrayList(TreeBuilder.makeField(x), TreeBuilder.makeStringLiteral("%map%")),
+ boolType);
+ ExpressionTree expr = TreeBuilder.makeExpression(cond, Field.nullable("res", boolType));
+ Schema schema = new Schema(Lists.newArrayList(x));
+ Projector eval = Projector.make(schema, Lists.newArrayList(expr));
+
+ int numRows = 5;
+ byte[] validity = new byte[]{(byte) 255, 0};
+ String[] valuesX = new String[]{"mapD", "maps", "google maps", "map", "MapR"};
+ boolean[] expected = new boolean[]{true, true, true, true, false};
+
+ ArrowBuf validityX = buf(validity);
+ List<ArrowBuf> dataBufsX = stringBufs(valuesX);
+
+ ArrowRecordBatch batch =
+ new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(new ArrowFieldNode(numRows, 0)),
+ Lists.newArrayList(validityX, dataBufsX.get(0), dataBufsX.get(1)));
+
+ BitVector bitVector = new BitVector(EMPTY_SCHEMA_PATH, allocator);
+ bitVector.allocateNew(numRows);
+
+ List<ValueVector> output = new ArrayList<ValueVector>();
+ output.add(bitVector);
+ eval.evaluate(batch, output);
+
+ for (int i = 0; i < numRows; i++) {
+ assertFalse(bitVector.isNull(i));
+ assertEquals(expected[i], bitVector.getObject(i).booleanValue());
+ }
+
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ eval.close();
+ }
+
+ @Test
+ public void testRegexpReplace() throws GandivaException {
+
+ Field x = Field.nullable("x", new ArrowType.Utf8());
+ Field replaceString = Field.nullable("replaceString", new ArrowType.Utf8());
+
+ Field retType = Field.nullable("c", new ArrowType.Utf8());
+
+ TreeNode cond =
+ TreeBuilder.makeFunction(
+ "regexp_replace",
+ Lists.newArrayList(TreeBuilder.makeField(x), TreeBuilder.makeStringLiteral("ana"),
+ TreeBuilder.makeField(replaceString)),
+ new ArrowType.Utf8());
+ ExpressionTree expr = TreeBuilder.makeExpression(cond, retType);
+ Schema schema = new Schema(Lists.newArrayList(x, replaceString));
+ Projector eval = Projector.make(schema, Lists.newArrayList(expr));
+
+ int numRows = 5;
+ byte[] validity = new byte[]{(byte) 15, 0};
+ String[] valuesX = new String[]{"banana", "bananaana", "bananana", "anaana", "anaana"};
+ String[] valuesReplace = new String[]{"ue", "", "", "c", ""};
+ String[] expected = new String[]{"buena", "bna", "bn", "cc", null};
+
+ ArrowBuf validityX = buf(validity);
+ ArrowBuf validityReplace = buf(validity);
+ List<ArrowBuf> dataBufsX = stringBufs(valuesX);
+ List<ArrowBuf> dataBufsReplace = stringBufs(valuesReplace);
+
+ ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
+
+ ArrowRecordBatch batch =
+ new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(fieldNode, fieldNode),
+ Lists.newArrayList(validityX, dataBufsX.get(0), dataBufsX.get(1), validityReplace,
+ dataBufsReplace.get(0), dataBufsReplace.get(1)));
+
+ // allocate data for output vector.
+ VarCharVector outVector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator);
+ outVector.allocateNew(numRows * 15, numRows);
+
+ // evaluate expression
+ List<ValueVector> output = new ArrayList<>();
+ output.add(outVector);
+ eval.evaluate(batch, output);
+ eval.close();
+
+ // match expected output.
+ for (int i = 0; i < numRows - 1; i++) {
+ assertFalse("Expect none value equals null", outVector.isNull(i));
+ assertEquals(expected[i], new String(outVector.get(i)));
+ }
+
+ assertTrue("Last value must be null", outVector.isNull(numRows - 1));
+
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ }
+
+ @Test
+ public void testRand() throws GandivaException {
+
+ TreeNode randWithSeed =
+ TreeBuilder.makeFunction(
+ "rand",
+ Lists.newArrayList(TreeBuilder.makeLiteral(12)),
+ float64);
+ TreeNode rand =
+ TreeBuilder.makeFunction(
+ "rand",
+ Lists.newArrayList(),
+ float64);
+ ExpressionTree exprWithSeed = TreeBuilder.makeExpression(randWithSeed, Field.nullable("res", float64));
+ ExpressionTree expr = TreeBuilder.makeExpression(rand, Field.nullable("res2", float64));
+ Field x = Field.nullable("x", new ArrowType.Utf8());
+ Schema schema = new Schema(Lists.newArrayList(x));
+ Projector evalWithSeed = Projector.make(schema, Lists.newArrayList(exprWithSeed));
+ Projector eval = Projector.make(schema, Lists.newArrayList(expr));
+
+ int numRows = 5;
+ byte[] validity = new byte[] {(byte) 255, 0};
+ String[] valuesX = new String[] {"mapD", "maps", "google maps", "map", "MapR"};
+ double[] expected = new double[] {0.1597116001879662D, 0.7347813877263527D, 0.6069965050584282D,
+ 0.7240285696335824D, 0.09975540272957834D};
+
+ ArrowBuf validityX = buf(validity);
+ List<ArrowBuf> dataBufsX = stringBufs(valuesX);
+
+ ArrowRecordBatch batch =
+ new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(new ArrowFieldNode(numRows, 0)),
+ Lists.newArrayList(validityX, dataBufsX.get(0), dataBufsX.get(1)));
+
+ Float8Vector float8Vector = new Float8Vector(EMPTY_SCHEMA_PATH, allocator);
+ float8Vector.allocateNew(numRows);
+
+ List<ValueVector> output = new ArrayList<ValueVector>();
+ output.add(float8Vector);
+ evalWithSeed.evaluate(batch, output);
+
+ for (int i = 0; i < numRows; i++) {
+ assertFalse(float8Vector.isNull(i));
+ assertEquals(expected[i], float8Vector.getObject(i), 0.000000001);
+ }
+
+ eval.evaluate(batch, output); // without seed
+ assertNotEquals(float8Vector.getObject(0), float8Vector.getObject(1), 0.000000001);
+
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ eval.close();
+ evalWithSeed.close();
+ }
+
+ @Test
+ public void testBinaryFields() throws GandivaException {
+ Field a = Field.nullable("a", new ArrowType.Binary());
+ Field b = Field.nullable("b", new ArrowType.Binary());
+ List<Field> args = Lists.newArrayList(a, b);
+
+ ArrowType retType = new ArrowType.Bool();
+ ExpressionTree expr = TreeBuilder.makeExpression("equal", args, Field.nullable("res", retType));
+
+ Schema schema = new Schema(Lists.newArrayList(args));
+ Projector eval = Projector.make(schema, Lists.newArrayList(expr));
+
+ int numRows = 5;
+ byte[] validity = new byte[]{(byte) 255, 0};
+ String[] valuesA = new String[]{"a", "aa", "aaa", "aaaa", "A função"};
+ String[] valuesB = new String[]{"a", "bb", "aaa", "bbbbb", "A função"};
+ boolean[] expected = new boolean[]{true, false, true, false, true};
+
+ ArrowBuf validitya = buf(validity);
+ ArrowBuf validityb = buf(validity);
+ List<ArrowBuf> inBufsA = binaryBufs(valuesA);
+ List<ArrowBuf> inBufsB = binaryBufs(valuesB);
+
+ ArrowRecordBatch batch =
+ new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(new ArrowFieldNode(numRows, 8), new ArrowFieldNode(numRows, 8)),
+ Lists.newArrayList(
+ validitya,
+ inBufsA.get(0),
+ inBufsA.get(1),
+ validityb,
+ inBufsB.get(0),
+ inBufsB.get(1)));
+
+ BitVector bitVector = new BitVector(EMPTY_SCHEMA_PATH, allocator);
+ bitVector.allocateNew(numRows);
+
+ List<ValueVector> output = new ArrayList<ValueVector>();
+ output.add(bitVector);
+ eval.evaluate(batch, output);
+
+ for (int i = 0; i < numRows; i++) {
+ assertFalse(bitVector.isNull(i));
+ assertEquals(expected[i], bitVector.getObject(i).booleanValue());
+ }
+
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ eval.close();
+ }
+
+ private TreeNode makeLongLessThanCond(TreeNode arg, long value) {
+ return TreeBuilder.makeFunction(
+ "less_than", Lists.newArrayList(arg, TreeBuilder.makeLiteral(value)), boolType);
+ }
+
+ private TreeNode makeLongGreaterThanCond(TreeNode arg, long value) {
+ return TreeBuilder.makeFunction(
+ "greater_than", Lists.newArrayList(arg, TreeBuilder.makeLiteral(value)), boolType);
+ }
+
+ private TreeNode ifLongLessThanElse(
+ TreeNode arg, long value, long thenValue, TreeNode elseNode, ArrowType type) {
+ return TreeBuilder.makeIf(
+ makeLongLessThanCond(arg, value), TreeBuilder.makeLiteral(thenValue), elseNode, type);
+ }
+
+ @Test
+ public void testIf() throws GandivaException, Exception {
+ /*
+ * when x < 10 then 0
+ * when x < 20 then 1
+ * when x < 30 then 2
+ * when x < 40 then 3
+ * when x < 50 then 4
+ * when x < 60 then 5
+ * when x < 70 then 6
+ * when x < 80 then 7
+ * when x < 90 then 8
+ * when x < 100 then 9
+ * else 10
+ */
+ Field x = Field.nullable("x", int64);
+ TreeNode xNode = TreeBuilder.makeField(x);
+
+ // if (x < 100) then 9 else 10
+ TreeNode ifLess100 = ifLongLessThanElse(xNode, 100L, 9L, TreeBuilder.makeLiteral(10L), int64);
+ // if (x < 90) then 8 else ifLess100
+ TreeNode ifLess90 = ifLongLessThanElse(xNode, 90L, 8L, ifLess100, int64);
+ // if (x < 80) then 7 else ifLess90
+ TreeNode ifLess80 = ifLongLessThanElse(xNode, 80L, 7L, ifLess90, int64);
+ // if (x < 70) then 6 else ifLess80
+ TreeNode ifLess70 = ifLongLessThanElse(xNode, 70L, 6L, ifLess80, int64);
+ // if (x < 60) then 5 else ifLess70
+ TreeNode ifLess60 = ifLongLessThanElse(xNode, 60L, 5L, ifLess70, int64);
+ // if (x < 50) then 4 else ifLess60
+ TreeNode ifLess50 = ifLongLessThanElse(xNode, 50L, 4L, ifLess60, int64);
+ // if (x < 40) then 3 else ifLess50
+ TreeNode ifLess40 = ifLongLessThanElse(xNode, 40L, 3L, ifLess50, int64);
+ // if (x < 30) then 2 else ifLess40
+ TreeNode ifLess30 = ifLongLessThanElse(xNode, 30L, 2L, ifLess40, int64);
+ // if (x < 20) then 1 else ifLess30
+ TreeNode ifLess20 = ifLongLessThanElse(xNode, 20L, 1L, ifLess30, int64);
+ // if (x < 10) then 0 else ifLess20
+ TreeNode ifLess10 = ifLongLessThanElse(xNode, 10L, 0L, ifLess20, int64);
+
+ ExpressionTree expr = TreeBuilder.makeExpression(ifLess10, x);
+ Schema schema = new Schema(Lists.newArrayList(x));
+ Projector eval = Projector.make(schema, Lists.newArrayList(expr));
+
+ int numRows = 16;
+ byte[] validity = new byte[]{(byte) 255, (byte) 255};
+ long[] xValues = new long[]{9, 15, 21, 32, 43, 54, 65, 76, 87, 98, 109, 200, -10, 60, 77, 80};
+ long[] expected = new long[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 0, 6, 7, 8};
+
+ ArrowBuf bufValidity = buf(validity);
+ ArrowBuf xData = longBuf(xValues);
+
+ ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
+ ArrowRecordBatch batch =
+ new ArrowRecordBatch(
+ numRows, Lists.newArrayList(fieldNode), Lists.newArrayList(bufValidity, xData));
+
+ BigIntVector bigIntVector = new BigIntVector(EMPTY_SCHEMA_PATH, allocator);
+ bigIntVector.allocateNew(numRows);
+
+ List<ValueVector> output = new ArrayList<ValueVector>();
+ output.add(bigIntVector);
+ eval.evaluate(batch, output);
+
+ for (int i = 0; i < numRows; i++) {
+ assertFalse(bigIntVector.isNull(i));
+ assertEquals(expected[i], bigIntVector.get(i));
+ }
+
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ eval.close();
+ }
+
+ @Test
+ public void testAnd() throws GandivaException, Exception {
+ /*
+ * x > 10 AND x < 20
+ */
+ ArrowType int64 = new ArrowType.Int(64, true);
+
+ Field x = Field.nullable("x", int64);
+ TreeNode xNode = TreeBuilder.makeField(x);
+ TreeNode gt10 = makeLongGreaterThanCond(xNode, 10);
+ TreeNode lt20 = makeLongLessThanCond(xNode, 20);
+ TreeNode and = TreeBuilder.makeAnd(Lists.newArrayList(gt10, lt20));
+
+ Field res = Field.nullable("res", boolType);
+
+ ExpressionTree expr = TreeBuilder.makeExpression(and, res);
+ Schema schema = new Schema(Lists.newArrayList(x));
+ Projector eval = Projector.make(schema, Lists.newArrayList(expr));
+
+ int numRows = 4;
+ byte[] validity = new byte[]{(byte) 255};
+ long[] xValues = new long[]{9, 15, 17, 25};
+ boolean[] expected = new boolean[]{false, true, true, false};
+
+ ArrowBuf bufValidity = buf(validity);
+ ArrowBuf xData = longBuf(xValues);
+
+ ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
+ ArrowRecordBatch batch =
+ new ArrowRecordBatch(
+ numRows, Lists.newArrayList(fieldNode), Lists.newArrayList(bufValidity, xData));
+
+ BitVector bitVector = new BitVector(EMPTY_SCHEMA_PATH, allocator);
+ bitVector.allocateNew(numRows);
+
+ List<ValueVector> output = new ArrayList<ValueVector>();
+ output.add(bitVector);
+ eval.evaluate(batch, output);
+
+ for (int i = 0; i < numRows; i++) {
+ assertFalse(bitVector.isNull(i));
+ assertEquals(expected[i], bitVector.getObject(i).booleanValue());
+ }
+
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ eval.close();
+ }
+
+ @Test
+ public void testOr() throws GandivaException, Exception {
+ /*
+ * x > 10 OR x < 5
+ */
+ ArrowType int64 = new ArrowType.Int(64, true);
+
+ Field x = Field.nullable("x", int64);
+ TreeNode xNode = TreeBuilder.makeField(x);
+ TreeNode gt10 = makeLongGreaterThanCond(xNode, 10);
+ TreeNode lt5 = makeLongLessThanCond(xNode, 5);
+ TreeNode or = TreeBuilder.makeOr(Lists.newArrayList(gt10, lt5));
+
+ Field res = Field.nullable("res", boolType);
+
+ ExpressionTree expr = TreeBuilder.makeExpression(or, res);
+ Schema schema = new Schema(Lists.newArrayList(x));
+ Projector eval = Projector.make(schema, Lists.newArrayList(expr));
+
+ int numRows = 4;
+ byte[] validity = new byte[]{(byte) 255};
+ long[] xValues = new long[]{4, 9, 15, 17};
+ boolean[] expected = new boolean[]{true, false, true, true};
+
+ ArrowBuf bufValidity = buf(validity);
+ ArrowBuf xData = longBuf(xValues);
+
+ ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
+ ArrowRecordBatch batch =
+ new ArrowRecordBatch(
+ numRows, Lists.newArrayList(fieldNode), Lists.newArrayList(bufValidity, xData));
+
+ BitVector bitVector = new BitVector(EMPTY_SCHEMA_PATH, allocator);
+ bitVector.allocateNew(numRows);
+
+ List<ValueVector> output = new ArrayList<ValueVector>();
+ output.add(bitVector);
+ eval.evaluate(batch, output);
+
+ for (int i = 0; i < numRows; i++) {
+ assertFalse(bitVector.isNull(i));
+ assertEquals(expected[i], bitVector.getObject(i).booleanValue());
+ }
+
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ eval.close();
+ }
+
+ @Test
+ public void testNull() throws GandivaException, Exception {
+ /*
+ * when x < 10 then 1
+ * else null
+ */
+ ArrowType int64 = new ArrowType.Int(64, true);
+
+ Field x = Field.nullable("x", int64);
+ TreeNode xNode = TreeBuilder.makeField(x);
+
+ // if (x < 10) then 1 else null
+ TreeNode ifLess10 = ifLongLessThanElse(xNode, 10L, 1L, TreeBuilder.makeNull(int64), int64);
+
+ ExpressionTree expr = TreeBuilder.makeExpression(ifLess10, x);
+ Schema schema = new Schema(Lists.newArrayList(x));
+ Projector eval = Projector.make(schema, Lists.newArrayList(expr));
+
+ int numRows = 2;
+ byte[] validity = new byte[]{(byte) 255};
+ long[] xValues = new long[]{5, 32};
+ long[] expected = new long[]{1, 0};
+
+ ArrowBuf bufValidity = buf(validity);
+ ArrowBuf xData = longBuf(xValues);
+
+ ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
+ ArrowRecordBatch batch =
+ new ArrowRecordBatch(
+ numRows, Lists.newArrayList(fieldNode), Lists.newArrayList(bufValidity, xData));
+
+ BigIntVector bigIntVector = new BigIntVector(EMPTY_SCHEMA_PATH, allocator);
+ bigIntVector.allocateNew(numRows);
+
+ List<ValueVector> output = new ArrayList<ValueVector>();
+ output.add(bigIntVector);
+ eval.evaluate(batch, output);
+
+ // first element should be 1
+ assertFalse(bigIntVector.isNull(0));
+ assertEquals(expected[0], bigIntVector.get(0));
+
+ // second element should be null
+ assertTrue(bigIntVector.isNull(1));
+
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ eval.close();
+ }
+
+ @Test
+ public void testTimeNull() throws GandivaException, Exception {
+
+ ArrowType time64 = new ArrowType.Time(TimeUnit.MICROSECOND, 64);
+
+ Field x = Field.nullable("x", time64);
+ TreeNode xNode = TreeBuilder.makeNull(time64);
+
+ ExpressionTree expr = TreeBuilder.makeExpression(xNode, x);
+ Schema schema = new Schema(Lists.newArrayList(x));
+ Projector eval = Projector.make(schema, Lists.newArrayList(expr));
+
+ int numRows = 2;
+ byte[] validity = new byte[]{(byte) 255};
+ int[] xValues = new int[]{5, 32};
+
+ ArrowBuf bufValidity = buf(validity);
+ ArrowBuf xData = intBuf(xValues);
+
+ ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
+ ArrowRecordBatch batch =
+ new ArrowRecordBatch(
+ numRows, Lists.newArrayList(fieldNode), Lists.newArrayList(bufValidity, xData));
+
+ BigIntVector bigIntVector = new BigIntVector(EMPTY_SCHEMA_PATH, allocator);
+ bigIntVector.allocateNew(numRows);
+
+ List<ValueVector> output = new ArrayList<ValueVector>();
+ output.add(bigIntVector);
+ eval.evaluate(batch, output);
+
+ assertTrue(bigIntVector.isNull(0));
+ assertTrue(bigIntVector.isNull(1));
+
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ eval.close();
+ }
+
+ @Test
+ public void testTimeEquals() throws GandivaException, Exception { /*
+ * when isnotnull(x) then x
+ * else y
+ */
+ Field x = Field.nullable("x", new ArrowType.Time(TimeUnit.MILLISECOND, 32));
+ TreeNode xNode = TreeBuilder.makeField(x);
+
+ Field y = Field.nullable("y", new ArrowType.Time(TimeUnit.MILLISECOND, 32));
+ TreeNode yNode = TreeBuilder.makeField(y);
+
+ // if isnotnull(x) then x else y
+ TreeNode condition = TreeBuilder.makeFunction("isnotnull", Lists.newArrayList(xNode),
+ boolType);
+ TreeNode ifCoalesce = TreeBuilder.makeIf(
+ condition,
+ xNode,
+ yNode,
+ new ArrowType.Time(TimeUnit.MILLISECOND, 32));
+
+ ExpressionTree expr = TreeBuilder.makeExpression(ifCoalesce, x);
+ Schema schema = new Schema(Lists.newArrayList(x, y));
+ Projector eval = Projector.make(schema, Lists.newArrayList(expr));
+
+ int numRows = 2;
+ byte[] validity = new byte[]{(byte) 1};
+ byte[] yValidity = new byte[]{(byte) 3};
+ int[] xValues = new int[]{5, 1};
+ int[] yValues = new int[]{10, 2};
+ int[] expected = new int[]{5, 2};
+
+ ArrowBuf bufValidity = buf(validity);
+ ArrowBuf xData = intBuf(xValues);
+
+ ArrowBuf yBufValidity = buf(yValidity);
+ ArrowBuf yData = intBuf(yValues);
+
+ ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
+ ArrowRecordBatch batch = new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(fieldNode),
+ Lists.newArrayList(bufValidity, xData, yBufValidity, yData));
+
+ IntVector intVector = new IntVector(EMPTY_SCHEMA_PATH, allocator);
+ intVector.allocateNew(numRows);
+
+ List<ValueVector> output = new ArrayList<ValueVector>();
+ output.add(intVector);
+ eval.evaluate(batch, output);
+
+ // output should be 5 and 2
+ assertFalse(intVector.isNull(0));
+ assertEquals(expected[0], intVector.get(0));
+ assertEquals(expected[1], intVector.get(1));
+
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ eval.close();
+ }
+
+ @Test
+ public void testIsNull() throws GandivaException, Exception {
+ Field x = Field.nullable("x", float64);
+
+ TreeNode xNode = TreeBuilder.makeField(x);
+ TreeNode isNull = TreeBuilder.makeFunction("isnull", Lists.newArrayList(xNode), boolType);
+ ExpressionTree expr = TreeBuilder.makeExpression(isNull, Field.nullable("result", boolType));
+ Schema schema = new Schema(Lists.newArrayList(x));
+ Projector eval = Projector.make(schema, Lists.newArrayList(expr));
+
+ int numRows = 16;
+ byte[] validity = new byte[]{(byte) 255, 0};
+ double[] xValues =
+ new double[]{
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0
+ };
+
+ ArrowBuf bufValidity = buf(validity);
+ ArrowBuf xData = doubleBuf(xValues);
+
+ ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
+ ArrowRecordBatch batch =
+ new ArrowRecordBatch(
+ numRows, Lists.newArrayList(fieldNode), Lists.newArrayList(bufValidity, xData));
+
+ BitVector bitVector = new BitVector(EMPTY_SCHEMA_PATH, allocator);
+ bitVector.allocateNew(numRows);
+
+ List<ValueVector> output = new ArrayList<ValueVector>();
+ output.add(bitVector);
+ eval.evaluate(batch, output);
+
+ for (int i = 0; i < 8; i++) {
+ assertFalse(bitVector.getObject(i).booleanValue());
+ }
+ for (int i = 8; i < numRows; i++) {
+ assertTrue(bitVector.getObject(i).booleanValue());
+ }
+
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ eval.close();
+ }
+
+ @Test
+ public void testEquals() throws GandivaException, Exception {
+ Field c1 = Field.nullable("c1", int32);
+ Field c2 = Field.nullable("c2", int32);
+
+ TreeNode c1Node = TreeBuilder.makeField(c1);
+ TreeNode c2Node = TreeBuilder.makeField(c2);
+ TreeNode equals =
+ TreeBuilder.makeFunction("equal", Lists.newArrayList(c1Node, c2Node), boolType);
+ ExpressionTree expr = TreeBuilder.makeExpression(equals, Field.nullable("result", boolType));
+ Schema schema = new Schema(Lists.newArrayList(c1, c2));
+ Projector eval = Projector.make(schema, Lists.newArrayList(expr));
+
+ int numRows = 16;
+ byte[] validity = new byte[]{(byte) 255, 0};
+ int[] c1Values = new int[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+ int[] c2Values = new int[]{1, 2, 3, 4, 8, 7, 6, 5, 16, 15, 14, 13, 12, 11, 10, 9};
+
+ ArrowBuf c1Validity = buf(validity);
+ ArrowBuf c1Data = intBuf(c1Values);
+ ArrowBuf c2Validity = buf(validity);
+ ArrowBuf c2Data = intBuf(c2Values);
+
+ ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
+ ArrowRecordBatch batch =
+ new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(fieldNode, fieldNode),
+ Lists.newArrayList(c1Validity, c1Data, c2Validity, c2Data));
+
+ BitVector bitVector = new BitVector(EMPTY_SCHEMA_PATH, allocator);
+ bitVector.allocateNew(numRows);
+
+ List<ValueVector> output = new ArrayList<ValueVector>();
+ output.add(bitVector);
+ eval.evaluate(batch, output);
+
+ for (int i = 0; i < 4; i++) {
+ assertTrue(bitVector.getObject(i).booleanValue());
+ }
+ for (int i = 4; i < 8; i++) {
+ assertFalse(bitVector.getObject(i).booleanValue());
+ }
+ for (int i = 8; i < 16; i++) {
+ assertTrue(bitVector.isNull(i));
+ }
+
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ eval.close();
+ }
+
+ @Test
+ public void testInExpr() throws GandivaException, Exception {
+ Field c1 = Field.nullable("c1", int32);
+
+ TreeNode inExpr =
+ TreeBuilder.makeInExpressionInt32(TreeBuilder.makeField(c1), Sets.newHashSet(1, 2, 3, 4, 5, 15, 16));
+ ExpressionTree expr = TreeBuilder.makeExpression(inExpr, Field.nullable("result", boolType));
+ Schema schema = new Schema(Lists.newArrayList(c1));
+ Projector eval = Projector.make(schema, Lists.newArrayList(expr));
+
+ int numRows = 16;
+ byte[] validity = new byte[]{(byte) 255, 0};
+ int[] c1Values = new int[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+
+ ArrowBuf c1Validity = buf(validity);
+ ArrowBuf c1Data = intBuf(c1Values);
+ ArrowBuf c2Validity = buf(validity);
+
+ ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
+ ArrowRecordBatch batch =
+ new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(fieldNode, fieldNode),
+ Lists.newArrayList(c1Validity, c1Data, c2Validity));
+
+ BitVector bitVector = new BitVector(EMPTY_SCHEMA_PATH, allocator);
+ bitVector.allocateNew(numRows);
+
+ List<ValueVector> output = new ArrayList<ValueVector>();
+ output.add(bitVector);
+ eval.evaluate(batch, output);
+
+ for (int i = 0; i < 5; i++) {
+ assertTrue(bitVector.getObject(i).booleanValue());
+ }
+ for (int i = 5; i < 16; i++) {
+ assertFalse(bitVector.getObject(i).booleanValue());
+ }
+
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ eval.close();
+ }
+
+ @Test
+ public void testInExprDecimal() throws GandivaException, Exception {
+ Integer precision = 26;
+ Integer scale = 5;
+ ArrowType.Decimal decimal = new ArrowType.Decimal(precision, scale, 128);
+ Field c1 = Field.nullable("c1", decimal);
+
+ String[] values = new String[]{"1", "2", "3", "4"};
+ Set<BigDecimal> decimalSet = decimalSet(values, scale);
+ decimalSet.add(new BigDecimal(-0.0));
+ decimalSet.add(new BigDecimal(Long.MAX_VALUE));
+ decimalSet.add(new BigDecimal(Long.MIN_VALUE));
+ TreeNode inExpr =
+ TreeBuilder.makeInExpressionDecimal(TreeBuilder.makeField(c1),
+ decimalSet, precision, scale);
+ ExpressionTree expr = TreeBuilder.makeExpression(inExpr,
+ Field.nullable("result", boolType));
+ Schema schema = new Schema(Lists.newArrayList(c1));
+ Projector eval = Projector.make(schema, Lists.newArrayList(expr));
+
+ int numRows = 16;
+ byte[] validity = new byte[]{(byte) 255, 0};
+ String[] c1Values =
+ new String[]{"1", "2", "3", "4", "-0.0", "6", "7", "8", "9", "10", "11", "12", "13", "14",
+ String.valueOf(Long.MAX_VALUE),
+ String.valueOf(Long.MIN_VALUE)};
+
+ DecimalVector c1Data = decimalVector(c1Values, precision, scale);
+ ArrowBuf c1Validity = buf(validity);
+
+ ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
+ ArrowRecordBatch batch =
+ new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(fieldNode, fieldNode),
+ Lists.newArrayList(c1Validity, c1Data.getDataBuffer(), c1Data.getValidityBuffer()));
+
+ BitVector bitVector = new BitVector(EMPTY_SCHEMA_PATH, allocator);
+ bitVector.allocateNew(numRows);
+
+ List<ValueVector> output = new ArrayList<ValueVector>();
+ output.add(bitVector);
+ eval.evaluate(batch, output);
+
+ for (int i = 0; i < 5; i++) {
+ assertTrue(bitVector.getObject(i).booleanValue());
+ }
+ for (int i = 5; i < 16; i++) {
+ assertFalse(bitVector.getObject(i).booleanValue());
+ }
+
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ eval.close();
+ }
+
+ @Test
+ public void testInExprDouble() throws GandivaException, Exception {
+ Field c1 = Field.nullable("c1", float64);
+
+ TreeNode inExpr =
+ TreeBuilder.makeInExpressionDouble(TreeBuilder.makeField(c1),
+ Sets.newHashSet(1.0, -0.0, 3.0, 4.0, Double.NaN,
+ Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY));
+ ExpressionTree expr = TreeBuilder.makeExpression(inExpr, Field.nullable("result", boolType));
+ Schema schema = new Schema(Lists.newArrayList(c1));
+ Projector eval = Projector.make(schema, Lists.newArrayList(expr));
+
+ // Create a row-batch with some sample data to look for
+ int numRows = 16;
+ // Only the first 8 values will be valid.
+ byte[] validity = new byte[]{(byte) 255, 0};
+ double[] c1Values = new double[]{1, -0.0, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, Double.NaN,
+ 6, 7, 8, 9, 10, 11, 12, 13, 14, 4, 3};
+
+ ArrowBuf c1Validity = buf(validity);
+ ArrowBuf c1Data = doubleBuf(c1Values);
+ ArrowBuf c2Validity = buf(validity);
+
+ ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
+ ArrowRecordBatch batch =
+ new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(fieldNode, fieldNode),
+ Lists.newArrayList(c1Validity, c1Data, c2Validity));
+
+ BitVector bitVector = new BitVector(EMPTY_SCHEMA_PATH, allocator);
+ bitVector.allocateNew(numRows);
+
+ List<ValueVector> output = new ArrayList<ValueVector>();
+ output.add(bitVector);
+ eval.evaluate(batch, output);
+
+ // The first four values in the vector must match the expression, but not the other ones.
+ for (int i = 0; i < 4; i++) {
+ assertTrue(bitVector.getObject(i).booleanValue());
+ }
+ for (int i = 4; i < 16; i++) {
+ assertFalse(bitVector.getObject(i).booleanValue());
+ }
+
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ eval.close();
+ }
+
+ @Test
+ public void testInExprStrings() throws GandivaException, Exception {
+ Field c1 = Field.nullable("c1", new ArrowType.Utf8());
+
+ TreeNode l1 = TreeBuilder.makeLiteral(1L);
+ TreeNode l2 = TreeBuilder.makeLiteral(3L);
+ List<TreeNode> args = Lists.newArrayList(TreeBuilder.makeField(c1), l1, l2);
+ TreeNode substr = TreeBuilder.makeFunction("substr", args, new ArrowType.Utf8());
+ TreeNode inExpr =
+ TreeBuilder.makeInExpressionString(substr, Sets.newHashSet("one", "two", "thr", "fou"));
+ ExpressionTree expr = TreeBuilder.makeExpression(inExpr, Field.nullable("result", boolType));
+ Schema schema = new Schema(Lists.newArrayList(c1));
+ Projector eval = Projector.make(schema, Lists.newArrayList(expr));
+
+ int numRows = 16;
+ byte[] validity = new byte[]{(byte) 255, 0};
+ String[] c1Values = new String[]{"one", "two", "three", "four", "five", "six", "seven",
+ "eight", "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
+ "sixteen"};
+
+ ArrowBuf c1Validity = buf(validity);
+ List<ArrowBuf> dataBufsX = stringBufs(c1Values);
+ ArrowBuf c2Validity = buf(validity);
+
+ ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
+ ArrowRecordBatch batch =
+ new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(fieldNode, fieldNode),
+ Lists.newArrayList(c1Validity, dataBufsX.get(0), dataBufsX.get(1), c2Validity));
+
+ BitVector bitVector = new BitVector(EMPTY_SCHEMA_PATH, allocator);
+ bitVector.allocateNew(numRows);
+
+ List<ValueVector> output = new ArrayList<ValueVector>();
+ output.add(bitVector);
+ eval.evaluate(batch, output);
+
+ for (int i = 0; i < 4; i++) {
+ assertTrue(bitVector.getObject(i).booleanValue());
+ }
+ for (int i = 5; i < 16; i++) {
+ assertFalse(bitVector.getObject(i).booleanValue());
+ }
+
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ eval.close();
+ }
+
+ @Test
+ public void testSmallOutputVectors() throws GandivaException, Exception {
+ Field a = Field.nullable("a", int32);
+ Field b = Field.nullable("b", int32);
+ List<Field> args = Lists.newArrayList(a, b);
+
+ Field retType = Field.nullable("c", int32);
+ ExpressionTree root = TreeBuilder.makeExpression("add", args, retType);
+
+ List<ExpressionTree> exprs = Lists.newArrayList(root);
+
+ Schema schema = new Schema(args);
+ Projector eval = Projector.make(schema, exprs);
+
+ int numRows = 16;
+ byte[] validity = new byte[]{(byte) 255, 0};
+ // second half is "undefined"
+ int[] aValues = new int[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+ int[] bValues = new int[]{16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1};
+
+ ArrowBuf aValidity = buf(validity);
+ ArrowBuf aData = intBuf(aValues);
+ ArrowBuf bValidity = buf(validity);
+ ArrowBuf b2Validity = buf(validity);
+ ArrowBuf bData = intBuf(bValues);
+ ArrowRecordBatch batch =
+ new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(new ArrowFieldNode(numRows, 8), new ArrowFieldNode(numRows, 8)),
+ Lists.newArrayList(aValidity, aData, bValidity, bData, b2Validity));
+
+ IntVector intVector = new IntVector(EMPTY_SCHEMA_PATH, allocator);
+
+ List<ValueVector> output = new ArrayList<ValueVector>();
+ output.add(intVector);
+ try {
+ eval.evaluate(batch, output);
+ } catch (Throwable t) {
+ intVector.allocateNew(numRows);
+ eval.evaluate(batch, output);
+ }
+
+ for (int i = 0; i < 8; i++) {
+ assertFalse(intVector.isNull(i));
+ assertEquals(17, intVector.get(i));
+ }
+ for (int i = 8; i < 16; i++) {
+ assertTrue(intVector.isNull(i));
+ }
+
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ eval.close();
+ }
+
+ @Test
+ public void testDateTime() throws GandivaException, Exception {
+ ArrowType date64 = new ArrowType.Date(DateUnit.MILLISECOND);
+ // ArrowType time32 = new ArrowType.Time(TimeUnit.MILLISECOND, 32);
+ ArrowType timeStamp = new ArrowType.Timestamp(TimeUnit.MILLISECOND, "TZ");
+
+ Field dateField = Field.nullable("date", date64);
+ // Field timeField = Field.nullable("time", time32);
+ Field tsField = Field.nullable("timestamp", timeStamp);
+
+ TreeNode dateNode = TreeBuilder.makeField(dateField);
+ TreeNode tsNode = TreeBuilder.makeField(tsField);
+
+ List<TreeNode> dateArgs = Lists.newArrayList(dateNode);
+ TreeNode dateToYear = TreeBuilder.makeFunction("extractYear", dateArgs, int64);
+ TreeNode dateToMonth = TreeBuilder.makeFunction("extractMonth", dateArgs, int64);
+ TreeNode dateToDay = TreeBuilder.makeFunction("extractDay", dateArgs, int64);
+ TreeNode dateToHour = TreeBuilder.makeFunction("extractHour", dateArgs, int64);
+ TreeNode dateToMin = TreeBuilder.makeFunction("extractMinute", dateArgs, int64);
+
+ List<TreeNode> tsArgs = Lists.newArrayList(tsNode);
+ TreeNode tsToYear = TreeBuilder.makeFunction("extractYear", tsArgs, int64);
+ TreeNode tsToMonth = TreeBuilder.makeFunction("extractMonth", tsArgs, int64);
+ TreeNode tsToDay = TreeBuilder.makeFunction("extractDay", tsArgs, int64);
+ TreeNode tsToHour = TreeBuilder.makeFunction("extractHour", tsArgs, int64);
+ TreeNode tsToMin = TreeBuilder.makeFunction("extractMinute", tsArgs, int64);
+
+ Field resultField = Field.nullable("result", int64);
+ List<ExpressionTree> exprs =
+ Lists.newArrayList(
+ TreeBuilder.makeExpression(dateToYear, resultField),
+ TreeBuilder.makeExpression(dateToMonth, resultField),
+ TreeBuilder.makeExpression(dateToDay, resultField),
+ TreeBuilder.makeExpression(dateToHour, resultField),
+ TreeBuilder.makeExpression(dateToMin, resultField),
+ TreeBuilder.makeExpression(tsToYear, resultField),
+ TreeBuilder.makeExpression(tsToMonth, resultField),
+ TreeBuilder.makeExpression(tsToDay, resultField),
+ TreeBuilder.makeExpression(tsToHour, resultField),
+ TreeBuilder.makeExpression(tsToMin, resultField));
+
+ Schema schema = new Schema(Lists.newArrayList(dateField, tsField));
+ Projector eval = Projector.make(schema, exprs);
+
+ int numRows = 8;
+ byte[] validity = new byte[]{(byte) 255};
+ String[] values =
+ new String[]{
+ "2007-01-01T01:00:00.00Z",
+ "2007-03-05T03:40:00.00Z",
+ "2008-05-31T13:55:00.00Z",
+ "2000-06-30T23:20:00.00Z",
+ "2000-07-10T20:30:00.00Z",
+ "2000-08-20T00:14:00.00Z",
+ "2000-09-30T02:29:00.00Z",
+ "2000-10-31T05:33:00.00Z"
+ };
+ long[] expYearFromDate = new long[]{2007, 2007, 2008, 2000, 2000, 2000, 2000, 2000};
+ long[] expMonthFromDate = new long[]{1, 3, 5, 6, 7, 8, 9, 10};
+ long[] expDayFromDate = new long[]{1, 5, 31, 30, 10, 20, 30, 31};
+ long[] expHourFromDate = new long[]{1, 3, 13, 23, 20, 0, 2, 5};
+ long[] expMinFromDate = new long[]{0, 40, 55, 20, 30, 14, 29, 33};
+
+ long[][] expValues =
+ new long[][]{
+ expYearFromDate, expMonthFromDate, expDayFromDate, expHourFromDate, expMinFromDate
+ };
+
+ ArrowBuf bufValidity = buf(validity);
+ ArrowBuf millisData = stringToMillis(values);
+ ArrowBuf buf2Validity = buf(validity);
+ ArrowBuf millis2Data = stringToMillis(values);
+
+ ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
+ ArrowRecordBatch batch =
+ new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(fieldNode, fieldNode),
+ Lists.newArrayList(bufValidity, millisData, buf2Validity, millis2Data));
+
+ List<ValueVector> output = new ArrayList<ValueVector>();
+ for (int i = 0; i < exprs.size(); i++) {
+ BigIntVector bigIntVector = new BigIntVector(EMPTY_SCHEMA_PATH, allocator);
+ bigIntVector.allocateNew(numRows);
+ output.add(bigIntVector);
+ }
+ eval.evaluate(batch, output);
+ eval.close();
+
+ for (int i = 0; i < output.size(); i++) {
+ long[] expected = expValues[i % 5];
+ BigIntVector bigIntVector = (BigIntVector) output.get(i);
+
+ for (int j = 0; j < numRows; j++) {
+ assertFalse(bigIntVector.isNull(j));
+ assertEquals(expected[j], bigIntVector.get(j));
+ }
+ }
+
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ }
+
+ @Test
+ public void testDateTrunc() throws Exception {
+ ArrowType date64 = new ArrowType.Date(DateUnit.MILLISECOND);
+ Field dateField = Field.nullable("date", date64);
+
+ TreeNode dateNode = TreeBuilder.makeField(dateField);
+
+ List<TreeNode> dateArgs = Lists.newArrayList(dateNode);
+ TreeNode dateToYear = TreeBuilder.makeFunction("date_trunc_Year", dateArgs, date64);
+ TreeNode dateToMonth = TreeBuilder.makeFunction("date_trunc_Month", dateArgs, date64);
+
+ Field resultField = Field.nullable("result", date64);
+ List<ExpressionTree> exprs =
+ Lists.newArrayList(
+ TreeBuilder.makeExpression(dateToYear, resultField),
+ TreeBuilder.makeExpression(dateToMonth, resultField));
+
+ Schema schema = new Schema(Lists.newArrayList(dateField));
+ Projector eval = Projector.make(schema, exprs);
+
+ int numRows = 4;
+ byte[] validity = new byte[]{(byte) 255};
+ String[] values = new String[]{
+ "2007-01-01T01:00:00.00Z",
+ "2007-03-05T03:40:00.00Z",
+ "2008-05-31T13:55:00.00Z",
+ "2000-06-30T23:20:00.00Z",
+ };
+ String[] expYearFromDate = new String[]{
+ "2007-01-01T00:00:00.00Z",
+ "2007-01-01T00:00:00.00Z",
+ "2008-01-01T00:00:00.00Z",
+ "2000-01-01T00:00:00.00Z",
+ };
+ String[] expMonthFromDate = new String[]{
+ "2007-01-01T00:00:00.00Z",
+ "2007-03-01T00:00:00.00Z",
+ "2008-05-01T00:00:00.00Z",
+ "2000-06-01T00:00:00.00Z",
+ };
+
+ String[][] expValues = new String[][]{ expYearFromDate, expMonthFromDate};
+
+ ArrowBuf bufValidity = buf(validity);
+ ArrowBuf millisData = stringToMillis(values);
+
+ ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
+ ArrowRecordBatch batch =
+ new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(fieldNode),
+ Lists.newArrayList(bufValidity, millisData));
+
+ List<ValueVector> output = new ArrayList<ValueVector>();
+ for (int i = 0; i < exprs.size(); i++) {
+ BigIntVector bigIntVector = new BigIntVector(EMPTY_SCHEMA_PATH, allocator);
+ bigIntVector.allocateNew(numRows);
+ output.add(bigIntVector);
+ }
+ eval.evaluate(batch, output);
+ eval.close();
+
+ for (int i = 0; i < output.size(); i++) {
+ String[] expected = expValues[i];
+ BigIntVector bigIntVector = (BigIntVector) output.get(i);
+
+ for (int j = 0; j < numRows; j++) {
+ assertFalse(bigIntVector.isNull(j));
+ assertEquals(Instant.parse(expected[j]).toEpochMilli(), bigIntVector.get(j));
+ }
+ }
+
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ }
+
+ @Test
+ public void testUnknownFunction() {
+ Field c1 = Field.nullable("c1", int8);
+ Field c2 = Field.nullable("c2", int8);
+
+ TreeNode c1Node = TreeBuilder.makeField(c1);
+ TreeNode c2Node = TreeBuilder.makeField(c2);
+
+ TreeNode unknown =
+ TreeBuilder.makeFunction("xxx_yyy", Lists.newArrayList(c1Node, c2Node), int8);
+ ExpressionTree expr = TreeBuilder.makeExpression(unknown, Field.nullable("result", int8));
+ Schema schema = new Schema(Lists.newArrayList(c1, c2));
+ boolean caughtException = false;
+ try {
+ Projector eval = Projector.make(schema, Lists.newArrayList(expr));
+ } catch (GandivaException ge) {
+ caughtException = true;
+ }
+
+ assertTrue(caughtException);
+ }
+
+ @Test
+ public void testCastTimestampToString() throws Exception {
+ ArrowType timeStamp = new ArrowType.Timestamp(TimeUnit.MILLISECOND, "TZ");
+
+ Field tsField = Field.nullable("timestamp", timeStamp);
+ Field lenField = Field.nullable("outLength", int64);
+
+ TreeNode tsNode = TreeBuilder.makeField(tsField);
+ TreeNode lenNode = TreeBuilder.makeField(lenField);
+
+ TreeNode tsToString = TreeBuilder.makeFunction("castVARCHAR", Lists.newArrayList(tsNode, lenNode),
+ new ArrowType.Utf8());
+
+ Field resultField = Field.nullable("result", new ArrowType.Utf8());
+ List<ExpressionTree> exprs =
+ Lists.newArrayList(
+ TreeBuilder.makeExpression(tsToString, resultField));
+
+ Schema schema = new Schema(Lists.newArrayList(tsField, lenField));
+ Projector eval = Projector.make(schema, exprs);
+
+ int numRows = 5;
+ byte[] validity = new byte[] {(byte) 255};
+ String[] values =
+ new String[] {
+ "0007-01-01T01:00:00Z",
+ "2007-03-05T03:40:00Z",
+ "2008-05-31T13:55:00Z",
+ "2000-06-30T23:20:00Z",
+ "2000-07-10T20:30:00Z",
+ };
+ long[] lenValues =
+ new long[] {
+ 23L, 24L, 22L, 0L, 4L
+ };
+
+ String[] expValues =
+ new String[] {
+ "0007-01-01 01:00:00.000",
+ "2007-03-05 03:40:00.000",
+ "2008-05-31 13:55:00.00",
+ "",
+ "2000",
+ };
+
+ ArrowBuf bufValidity = buf(validity);
+ ArrowBuf millisData = stringToMillis(values);
+ ArrowBuf lenValidity = buf(validity);
+ ArrowBuf lenData = longBuf(lenValues);
+
+ ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
+ ArrowRecordBatch batch =
+ new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(fieldNode, fieldNode),
+ Lists.newArrayList(bufValidity, millisData, lenValidity, lenData));
+
+ List<ValueVector> output = new ArrayList<>();
+ for (int i = 0; i < exprs.size(); i++) {
+ VarCharVector charVector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator);
+
+ charVector.allocateNew(numRows * 23, numRows);
+ output.add(charVector);
+ }
+ eval.evaluate(batch, output);
+ eval.close();
+
+ for (ValueVector valueVector : output) {
+ VarCharVector charVector = (VarCharVector) valueVector;
+
+ for (int j = 0; j < numRows; j++) {
+ assertFalse(charVector.isNull(j));
+ assertEquals(expValues[j], new String(charVector.get(j)));
+ }
+ }
+
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ }
+
+ @Test
+ public void testCastDayIntervalToBigInt() throws Exception {
+ ArrowType dayIntervalType = new ArrowType.Interval(IntervalUnit.DAY_TIME);
+
+ Field dayIntervalField = Field.nullable("dayInterval", dayIntervalType);
+
+ TreeNode intervalNode = TreeBuilder.makeField(dayIntervalField);
+
+ TreeNode intervalToBigint = TreeBuilder.makeFunction("castBIGINT", Lists.newArrayList(intervalNode), int64);
+
+ Field resultField = Field.nullable("result", int64);
+ List<ExpressionTree> exprs =
+ Lists.newArrayList(
+ TreeBuilder.makeExpression(intervalToBigint, resultField));
+
+ Schema schema = new Schema(Lists.newArrayList(dayIntervalField));
+ Projector eval = Projector.make(schema, exprs);
+
+ int numRows = 5;
+ byte[] validity = new byte[]{(byte) 255};
+ String[] values =
+ new String[]{
+ "1 0", // "days millis"
+ "2 0",
+ "1 1",
+ "10 5000",
+ "11 86400001",
+ };
+
+ Long[] expValues =
+ new Long[]{
+ 86400000L,
+ 2 * 86400000L,
+ 86400000L + 1L,
+ 10 * 86400000L + 5000L,
+ 11 * 86400000L + 86400001L
+ };
+
+ ArrowBuf bufValidity = buf(validity);
+ ArrowBuf intervalsData = stringToDayInterval(values);
+
+ ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
+ ArrowRecordBatch batch =
+ new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(fieldNode, fieldNode),
+ Lists.newArrayList(bufValidity, intervalsData));
+
+ List<ValueVector> output = new ArrayList<>();
+ for (int i = 0; i < exprs.size(); i++) {
+ BigIntVector bigIntVector = new BigIntVector(EMPTY_SCHEMA_PATH, allocator);
+ bigIntVector.allocateNew(numRows);
+ output.add(bigIntVector);
+ }
+ eval.evaluate(batch, output);
+ eval.close();
+
+ for (ValueVector valueVector : output) {
+ BigIntVector bigintVector = (BigIntVector) valueVector;
+
+ for (int j = 0; j < numRows; j++) {
+ assertFalse(bigintVector.isNull(j));
+ assertEquals(expValues[j], Long.valueOf(bigintVector.get(j)));
+ }
+ }
+
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ }
+
+ @Test
+ public void testCaseInsensitiveFunctions() throws Exception {
+ ArrowType timeStamp = new ArrowType.Timestamp(TimeUnit.MILLISECOND, "TZ");
+
+ Field tsField = Field.nullable("timestamp", timeStamp);
+
+ TreeNode tsNode = TreeBuilder.makeField(tsField);
+
+ TreeNode extractday = TreeBuilder.makeFunction("extractday", Lists.newArrayList(tsNode),
+ int64);
+
+ ExpressionTree expr = TreeBuilder.makeExpression(extractday, Field.nullable("result", int64));
+ Schema schema = new Schema(Lists.newArrayList(tsField));
+ Projector eval = Projector.make(schema, Lists.newArrayList(expr));
+
+ int numRows = 5;
+ byte[] validity = new byte[] {(byte) 255};
+ String[] values =
+ new String[] {
+ "0007-01-01T01:00:00Z",
+ "2007-03-05T03:40:00Z",
+ "2008-05-31T13:55:00Z",
+ "2000-06-30T23:20:00Z",
+ "2000-07-10T20:30:00Z",
+ };
+
+ long[] expValues =
+ new long[] {
+ 1, 5, 31, 30, 10
+ };
+
+ ArrowBuf bufValidity = buf(validity);
+ ArrowBuf millisData = stringToMillis(values);
+
+
+ ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
+ ArrowRecordBatch batch =
+ new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(fieldNode),
+ Lists.newArrayList(bufValidity, millisData));
+
+ List<ValueVector> output = new ArrayList<>();
+ BigIntVector bigIntVector = new BigIntVector(EMPTY_SCHEMA_PATH, allocator);
+ bigIntVector.allocateNew(numRows);
+ output.add(bigIntVector);
+
+ eval.evaluate(batch, output);
+ eval.close();
+
+ for (ValueVector valueVector : output) {
+ BigIntVector vector = (BigIntVector) valueVector;
+
+ for (int j = 0; j < numRows; j++) {
+ assertFalse(vector.isNull(j));
+ assertEquals(expValues[j], vector.get(j));
+ }
+ }
+
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ }
+
+ @Test
+ public void testCastInt() throws Exception {
+ Field inField = Field.nullable("input", new ArrowType.Utf8());
+ TreeNode inNode = TreeBuilder.makeField(inField);
+ TreeNode castINTFn = TreeBuilder.makeFunction("castINT", Lists.newArrayList(inNode),
+ int32);
+ Field resultField = Field.nullable("result", int32);
+ List<ExpressionTree> exprs =
+ Lists.newArrayList(
+ TreeBuilder.makeExpression(castINTFn, resultField));
+ Schema schema = new Schema(Lists.newArrayList(inField));
+ Projector eval = Projector.make(schema, exprs);
+ int numRows = 5;
+ byte[] validity = new byte[] {(byte) 255};
+ String[] values =
+ new String[] {
+ "0", "123", "-123", "-1", "1"
+ };
+ int[] expValues =
+ new int[] {
+ 0, 123, -123, -1, 1
+ };
+ ArrowBuf bufValidity = buf(validity);
+ List<ArrowBuf> bufData = stringBufs(values);
+ ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
+ ArrowRecordBatch batch =
+ new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(fieldNode),
+ Lists.newArrayList(bufValidity, bufData.get(0), bufData.get(1)));
+ List<ValueVector> output = new ArrayList<>();
+ for (int i = 0; i < exprs.size(); i++) {
+ IntVector intVector = new IntVector(EMPTY_SCHEMA_PATH, allocator);
+ intVector.allocateNew(numRows);
+ output.add(intVector);
+ }
+ eval.evaluate(batch, output);
+ eval.close();
+ for (ValueVector valueVector : output) {
+ IntVector intVector = (IntVector) valueVector;
+ for (int j = 0; j < numRows; j++) {
+ assertFalse(intVector.isNull(j));
+ assertTrue(expValues[j] == intVector.get(j));
+ }
+ }
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ }
+
+ @Test(expected = GandivaException.class)
+ public void testCastIntInvalidValue() throws Exception {
+ Field inField = Field.nullable("input", new ArrowType.Utf8());
+ TreeNode inNode = TreeBuilder.makeField(inField);
+ TreeNode castINTFn = TreeBuilder.makeFunction("castINT", Lists.newArrayList(inNode),
+ int32);
+ Field resultField = Field.nullable("result", int32);
+ List<ExpressionTree> exprs =
+ Lists.newArrayList(
+ TreeBuilder.makeExpression(castINTFn, resultField));
+ Schema schema = new Schema(Lists.newArrayList(inField));
+ Projector eval = Projector.make(schema, exprs);
+ int numRows = 1;
+ byte[] validity = new byte[] {(byte) 255};
+ String[] values =
+ new String[] {
+ "abc"
+ };
+ ArrowBuf bufValidity = buf(validity);
+ List<ArrowBuf> bufData = stringBufs(values);
+ ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
+ ArrowRecordBatch batch =
+ new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(fieldNode),
+ Lists.newArrayList(bufValidity, bufData.get(0), bufData.get(1)));
+ List<ValueVector> output = new ArrayList<>();
+ for (int i = 0; i < exprs.size(); i++) {
+ IntVector intVector = new IntVector(EMPTY_SCHEMA_PATH, allocator);
+ intVector.allocateNew(numRows);
+ output.add(intVector);
+ }
+ try {
+ eval.evaluate(batch, output);
+ } finally {
+ eval.close();
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ }
+ }
+
+ @Test
+ public void testCastFloat() throws Exception {
+ Field inField = Field.nullable("input", new ArrowType.Utf8());
+ TreeNode inNode = TreeBuilder.makeField(inField);
+ TreeNode castFLOAT8Fn = TreeBuilder.makeFunction("castFLOAT8", Lists.newArrayList(inNode),
+ float64);
+ Field resultField = Field.nullable("result", float64);
+ List<ExpressionTree> exprs =
+ Lists.newArrayList(
+ TreeBuilder.makeExpression(castFLOAT8Fn, resultField));
+ Schema schema = new Schema(Lists.newArrayList(inField));
+ Projector eval = Projector.make(schema, exprs);
+ int numRows = 5;
+ byte[] validity = new byte[] {(byte) 255};
+ String[] values =
+ new String[] {
+ "2.3",
+ "-11.11",
+ "0",
+ "111",
+ "12345.67"
+ };
+ double[] expValues =
+ new double[] {
+ 2.3, -11.11, 0, 111, 12345.67
+ };
+ ArrowBuf bufValidity = buf(validity);
+ List<ArrowBuf> bufData = stringBufs(values);
+ ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
+ ArrowRecordBatch batch =
+ new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(fieldNode),
+ Lists.newArrayList(bufValidity, bufData.get(0), bufData.get(1)));
+ List<ValueVector> output = new ArrayList<>();
+ for (int i = 0; i < exprs.size(); i++) {
+ Float8Vector float8Vector = new Float8Vector(EMPTY_SCHEMA_PATH, allocator);
+ float8Vector.allocateNew(numRows);
+ output.add(float8Vector);
+ }
+ eval.evaluate(batch, output);
+ eval.close();
+ for (ValueVector valueVector : output) {
+ Float8Vector float8Vector = (Float8Vector) valueVector;
+ for (int j = 0; j < numRows; j++) {
+ assertFalse(float8Vector.isNull(j));
+ assertTrue(expValues[j] == float8Vector.get(j));
+ }
+ }
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ }
+
+ @Test
+ public void testCastFloatVarbinary() throws Exception {
+ Field inField = Field.nullable("input", new ArrowType.Binary());
+ TreeNode inNode = TreeBuilder.makeField(inField);
+ TreeNode castFLOAT8Fn = TreeBuilder.makeFunction("castFLOAT8", Lists.newArrayList(inNode),
+ float64);
+ Field resultField = Field.nullable("result", float64);
+ List<ExpressionTree> exprs =
+ Lists.newArrayList(
+ TreeBuilder.makeExpression(castFLOAT8Fn, resultField));
+ Schema schema = new Schema(Lists.newArrayList(inField));
+ Projector eval = Projector.make(schema, exprs);
+ int numRows = 5;
+ byte[] validity = new byte[] {(byte) 255};
+ String[] values =
+ new String[] {
+ "2.3",
+ "-11.11",
+ "0",
+ "111",
+ "12345.67"
+ };
+ double[] expValues =
+ new double[] {
+ 2.3, -11.11, 0, 111, 12345.67
+ };
+ ArrowBuf bufValidity = buf(validity);
+ List<ArrowBuf> bufData = stringBufs(values);
+ ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
+ ArrowRecordBatch batch =
+ new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(fieldNode),
+ Lists.newArrayList(bufValidity, bufData.get(0), bufData.get(1)));
+ List<ValueVector> output = new ArrayList<>();
+ for (int i = 0; i < exprs.size(); i++) {
+ Float8Vector float8Vector = new Float8Vector(EMPTY_SCHEMA_PATH, allocator);
+ float8Vector.allocateNew(numRows);
+ output.add(float8Vector);
+ }
+ eval.evaluate(batch, output);
+ eval.close();
+ for (ValueVector valueVector : output) {
+ Float8Vector float8Vector = (Float8Vector) valueVector;
+ for (int j = 0; j < numRows; j++) {
+ assertFalse(float8Vector.isNull(j));
+ assertTrue(expValues[j] == float8Vector.get(j));
+ }
+ }
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ }
+
+ @Test(expected = GandivaException.class)
+ public void testCastFloatInvalidValue() throws Exception {
+ Field inField = Field.nullable("input", new ArrowType.Utf8());
+ TreeNode inNode = TreeBuilder.makeField(inField);
+ TreeNode castFLOAT8Fn = TreeBuilder.makeFunction("castFLOAT8", Lists.newArrayList(inNode),
+ float64);
+ Field resultField = Field.nullable("result", float64);
+ List<ExpressionTree> exprs =
+ Lists.newArrayList(
+ TreeBuilder.makeExpression(castFLOAT8Fn, resultField));
+ Schema schema = new Schema(Lists.newArrayList(inField));
+ Projector eval = Projector.make(schema, exprs);
+ int numRows = 5;
+ byte[] validity = new byte[] {(byte) 255};
+ String[] values =
+ new String[] {
+ "2.3",
+ "-11.11",
+ "abc",
+ "111",
+ "12345.67"
+ };
+ ArrowBuf bufValidity = buf(validity);
+ List<ArrowBuf> bufData = stringBufs(values);
+ ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
+ ArrowRecordBatch batch =
+ new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(fieldNode),
+ Lists.newArrayList(bufValidity, bufData.get(0), bufData.get(1)));
+ List<ValueVector> output = new ArrayList<>();
+ for (int i = 0; i < exprs.size(); i++) {
+ Float8Vector float8Vector = new Float8Vector(EMPTY_SCHEMA_PATH, allocator);
+ float8Vector.allocateNew(numRows);
+ output.add(float8Vector);
+ }
+ try {
+ eval.evaluate(batch, output);
+ } finally {
+ eval.close();
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ }
+ }
+
+ @Test
+ public void testEvaluateWithUnsetTargetHostCPU() throws Exception {
+ Field a = Field.nullable("a", int32);
+ Field b = Field.nullable("b", int32);
+ List<Field> args = Lists.newArrayList(a, b);
+
+ Field retType = Field.nullable("c", int32);
+ ExpressionTree root = TreeBuilder.makeExpression("add", args, retType);
+
+ List<ExpressionTree> exprs = Lists.newArrayList(root);
+
+ Schema schema = new Schema(args);
+ Projector eval = Projector.make(schema, exprs, new ConfigurationBuilder.ConfigOptions().withTargetCPU(false ));
+
+ int numRows = 16;
+ byte[] validity = new byte[]{(byte) 255, 0};
+ // second half is "undefined"
+ int[] aValues = new int[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+ int[] bValues = new int[]{16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1};
+
+ ArrowBuf validitya = buf(validity);
+ ArrowBuf valuesa = intBuf(aValues);
+ ArrowBuf validityb = buf(validity);
+ ArrowBuf valuesb = intBuf(bValues);
+ ArrowRecordBatch batch =
+ new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(new ArrowFieldNode(numRows, 8), new ArrowFieldNode(numRows, 8)),
+ Lists.newArrayList(validitya, valuesa, validityb, valuesb));
+
+ IntVector intVector = new IntVector(EMPTY_SCHEMA_PATH, allocator);
+ intVector.allocateNew(numRows);
+
+ List<ValueVector> output = new ArrayList<ValueVector>();
+ output.add(intVector);
+ eval.evaluate(batch, output);
+
+ for (int i = 0; i < 8; i++) {
+ assertFalse(intVector.isNull(i));
+ assertEquals(17, intVector.get(i));
+ }
+ for (int i = 8; i < 16; i++) {
+ assertTrue(intVector.isNull(i));
+ }
+
+ // free buffers
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ eval.close();
+ }
+
+ @Test
+ public void testCastVarcharFromInteger() throws Exception {
+ Field inField = Field.nullable("input", int32);
+ Field lenField = Field.nullable("outLength", int64);
+
+ TreeNode inNode = TreeBuilder.makeField(inField);
+ TreeNode lenNode = TreeBuilder.makeField(lenField);
+
+ TreeNode tsToString = TreeBuilder.makeFunction("castVARCHAR", Lists.newArrayList(inNode, lenNode),
+ new ArrowType.Utf8());
+
+ Field resultField = Field.nullable("result", new ArrowType.Utf8());
+ List<ExpressionTree> exprs =
+ Lists.newArrayList(
+ TreeBuilder.makeExpression(tsToString, resultField));
+
+ Schema schema = new Schema(Lists.newArrayList(inField, lenField));
+ Projector eval = Projector.make(schema, exprs);
+
+ int numRows = 5;
+ byte[] validity = new byte[] {(byte) 255};
+ int[] values =
+ new int[] {
+ 2345,
+ 2345,
+ 2345,
+ 2345,
+ -2345,
+ };
+ long[] lenValues =
+ new long[] {
+ 0L, 4L, 2L, 6L, 5L
+ };
+
+ String[] expValues =
+ new String[] {
+ "",
+ Integer.toString(2345).substring(0, 4),
+ Integer.toString(2345).substring(0, 2),
+ Integer.toString(2345),
+ Integer.toString(-2345)
+ };
+
+ ArrowBuf bufValidity = buf(validity);
+ ArrowBuf bufData = intBuf(values);
+ ArrowBuf lenValidity = buf(validity);
+ ArrowBuf lenData = longBuf(lenValues);
+
+ ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
+ ArrowRecordBatch batch =
+ new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(fieldNode, fieldNode),
+ Lists.newArrayList(bufValidity, bufData, lenValidity, lenData));
+
+ List<ValueVector> output = new ArrayList<>();
+ for (int i = 0; i < exprs.size(); i++) {
+ VarCharVector charVector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator);
+
+ charVector.allocateNew(numRows * 5, numRows);
+ output.add(charVector);
+ }
+ eval.evaluate(batch, output);
+ eval.close();
+
+ for (ValueVector valueVector : output) {
+ VarCharVector charVector = (VarCharVector) valueVector;
+
+ for (int j = 0; j < numRows; j++) {
+ assertFalse(charVector.isNull(j));
+ assertEquals(expValues[j], new String(charVector.get(j)));
+ }
+ }
+
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ }
+
+ @Test
+ public void testCastVarcharFromFloat() throws Exception {
+ Field inField = Field.nullable("input", float64);
+ Field lenField = Field.nullable("outLength", int64);
+
+ TreeNode inNode = TreeBuilder.makeField(inField);
+ TreeNode lenNode = TreeBuilder.makeField(lenField);
+
+ TreeNode tsToString = TreeBuilder.makeFunction("castVARCHAR", Lists.newArrayList(inNode, lenNode),
+ new ArrowType.Utf8());
+
+ Field resultField = Field.nullable("result", new ArrowType.Utf8());
+ List<ExpressionTree> exprs =
+ Lists.newArrayList(
+ TreeBuilder.makeExpression(tsToString, resultField));
+
+ Schema schema = new Schema(Lists.newArrayList(inField, lenField));
+ Projector eval = Projector.make(schema, exprs);
+
+ int numRows = 5;
+ byte[] validity = new byte[] {(byte) 255};
+ double[] values =
+ new double[] {
+ 0.0,
+ -0.0,
+ 1.0,
+ 0.001,
+ 0.0009,
+ 0.00099893,
+ 999999.9999,
+ 10000000.0,
+ 23943410000000.343434,
+ Double.POSITIVE_INFINITY,
+ Double.NEGATIVE_INFINITY,
+ Double.NaN,
+ 23.45,
+ 23.45,
+ -23.45,
+ };
+ long[] lenValues =
+ new long[] {
+ 6L, 6L, 6L, 6L, 10L, 15L, 15L, 15L, 30L,
+ 15L, 15L, 15L, 0L, 6L, 6L
+ };
+
+ /* The Java real numbers are represented in two ways and Gandiva must
+ * follow the same rules:
+ * - If the number is greater or equals than 10^7 and less than 10^(-3)
+ * it will be represented using scientific notation, e.g:
+ * - 0.000012 -> 1.2E-5
+ * - 10000002.3 -> 1.00000023E7
+ * - If the numbers are between that interval above, they are showed as is.
+ *
+ * The test checks if the Gandiva function casts the number with the same notation of the
+ * Java.
+ * */
+ String[] expValues =
+ new String[] {
+ Double.toString(0.0), // must be cast to -> "0.0"
+ Double.toString(-0.0), // must be cast to -> "-0.0"
+ Double.toString(1.0), // must be cast to -> "1.0"
+ Double.toString(0.001), // must be cast to -> "0.001"
+ Double.toString(0.0009), // must be cast to -> "9E-4"
+ Double.toString(0.00099893), // must be cast to -> "9E-4"
+ Double.toString(999999.9999), // must be cast to -> "999999.9999"
+ Double.toString(10000000.0), // must be cast to 1E7
+ Double.toString(23943410000000.343434),
+ Double.toString(Double.POSITIVE_INFINITY),
+ Double.toString(Double.NEGATIVE_INFINITY),
+ Double.toString(Double.NaN),
+ "",
+ Double.toString(23.45),
+ Double.toString(-23.45)
+ };
+
+ ArrowBuf bufValidity = buf(validity);
+ ArrowBuf bufData = doubleBuf(values);
+ ArrowBuf lenValidity = buf(validity);
+ ArrowBuf lenData = longBuf(lenValues);
+
+ ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
+ ArrowRecordBatch batch =
+ new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(fieldNode, fieldNode),
+ Lists.newArrayList(bufValidity, bufData, lenValidity, lenData));
+
+ List<ValueVector> output = new ArrayList<>();
+ for (int i = 0; i < exprs.size(); i++) {
+ VarCharVector charVector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator);
+
+ charVector.allocateNew(numRows * 5, numRows);
+ output.add(charVector);
+ }
+ eval.evaluate(batch, output);
+ eval.close();
+
+ for (ValueVector valueVector : output) {
+ VarCharVector charVector = (VarCharVector) valueVector;
+
+ for (int j = 0; j < numRows; j++) {
+ assertFalse(charVector.isNull(j));
+ assertEquals(expValues[j], new String(charVector.get(j)));
+ }
+ }
+
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ }
+
+ @Test
+ public void testInitCap() throws Exception {
+
+ Field x = Field.nullable("x", new ArrowType.Utf8());
+
+ Field retType = Field.nullable("c", new ArrowType.Utf8());
+
+ TreeNode cond =
+ TreeBuilder.makeFunction(
+ "initcap",
+ Lists.newArrayList(TreeBuilder.makeField(x)),
+ new ArrowType.Utf8());
+ ExpressionTree expr = TreeBuilder.makeExpression(cond, retType);
+ Schema schema = new Schema(Lists.newArrayList(x));
+ Projector eval = Projector.make(schema, Lists.newArrayList(expr));
+
+ int numRows = 5;
+ byte[] validity = new byte[]{(byte) 15, 0};
+ String[] valuesX = new String[]{
+ " øhpqršvñ \n\n",
+ "möbelträger1füße \nmöbelträge'rfüße",
+ "ÂbĆDËFgh\néll",
+ "citroën CaR",
+ "kjk"
+ };
+
+ String[] expected = new String[]{
+ " Øhpqršvñ \n\n",
+ "Möbelträger1füße \nMöbelträge'Rfüße",
+ "Âbćdëfgh\nÉll",
+ "Citroën Car",
+ null
+ };
+
+ ArrowBuf validityX = buf(validity);
+ List<ArrowBuf> dataBufsX = stringBufs(valuesX);
+
+ ArrowRecordBatch batch =
+ new ArrowRecordBatch(
+ numRows,
+ Lists.newArrayList(new ArrowFieldNode(numRows, 0)),
+ Lists.newArrayList(validityX, dataBufsX.get(0), dataBufsX.get(1)));
+
+ // allocate data for output vector.
+ VarCharVector outVector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator);
+ outVector.allocateNew(numRows * 100, numRows);
+
+ // evaluate expression
+ List<ValueVector> output = new ArrayList<>();
+ output.add(outVector);
+ eval.evaluate(batch, output);
+ eval.close();
+
+ // match expected output.
+ for (int i = 0; i < numRows - 1; i++) {
+ assertFalse("Expect none value equals null", outVector.isNull(i));
+ assertEquals(expected[i], new String(outVector.get(i)));
+ }
+
+ assertTrue("Last value must be null", outVector.isNull(numRows - 1));
+
+ releaseRecordBatch(batch);
+ releaseValueVectors(output);
+ }
+}
diff --git a/src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/TestJniLoader.java b/src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/TestJniLoader.java
new file mode 100644
index 000000000..116f0dd9e
--- /dev/null
+++ b/src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/TestJniLoader.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.evaluator;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestJniLoader {
+
+ @Test
+ public void testDefaultConfiguration() throws Exception {
+ long configId = JniLoader.getConfiguration(ConfigurationBuilder.ConfigOptions.getDefault());
+ Assert.assertEquals(configId, JniLoader.getDefaultConfiguration());
+ Assert.assertEquals(configId, JniLoader.getConfiguration(ConfigurationBuilder.ConfigOptions.getDefault()));
+
+ long configId2 = JniLoader.getConfiguration(new ConfigurationBuilder.ConfigOptions().withOptimize(false));
+ long configId3 = JniLoader.getConfiguration(new ConfigurationBuilder.ConfigOptions().withTargetCPU(false));
+ long configId4 = JniLoader.getConfiguration(new ConfigurationBuilder.ConfigOptions().withOptimize(false)
+ .withTargetCPU(false));
+
+ Assert.assertTrue(configId != configId2 && configId2 != configId3 && configId3 != configId4);
+
+ Assert.assertEquals(configId2, JniLoader.getConfiguration(new ConfigurationBuilder.ConfigOptions()
+ .withOptimize(false)));
+ Assert.assertEquals(configId3, JniLoader.getConfiguration(new ConfigurationBuilder.ConfigOptions()
+ .withTargetCPU(false)));
+ Assert.assertEquals(configId4, JniLoader.getConfiguration(new ConfigurationBuilder.ConfigOptions()
+ .withOptimize(false).withTargetCPU(false)));
+
+ JniLoader.removeConfiguration(new ConfigurationBuilder.ConfigOptions().withOptimize(false));
+ // configids are monotonically updated. after a config is removed, new one is assigned with higher id
+ Assert.assertNotEquals(configId2, JniLoader.getConfiguration(new ConfigurationBuilder.ConfigOptions()
+ .withOptimize(false)));
+
+ JniLoader.removeConfiguration(new ConfigurationBuilder.ConfigOptions());
+ Assert.assertNotEquals(configId, JniLoader.getConfiguration(ConfigurationBuilder.ConfigOptions.getDefault()));
+ }
+}
diff --git a/src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/expression/ArrowTypeHelperTest.java b/src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/expression/ArrowTypeHelperTest.java
new file mode 100644
index 000000000..7ddd602bf
--- /dev/null
+++ b/src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/expression/ArrowTypeHelperTest.java
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.expression;
+
+import static org.junit.Assert.assertEquals;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.arrow.gandiva.exceptions.GandivaException;
+import org.apache.arrow.gandiva.ipc.GandivaTypes;
+import org.apache.arrow.vector.types.FloatingPointPrecision;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.Test;
+
+public class ArrowTypeHelperTest {
+
+ private void testInt(int width, boolean isSigned, int expected) throws GandivaException {
+ ArrowType arrowType = new ArrowType.Int(width, isSigned);
+ GandivaTypes.ExtGandivaType gandivaType = ArrowTypeHelper.arrowTypeToProtobuf(arrowType);
+ assertEquals(expected, gandivaType.getType().getNumber());
+ }
+
+ @Test
+ public void testAllInts() throws GandivaException {
+ testInt(8, false, GandivaTypes.GandivaType.UINT8_VALUE);
+ testInt(8, true, GandivaTypes.GandivaType.INT8_VALUE);
+ testInt(16, false, GandivaTypes.GandivaType.UINT16_VALUE);
+ testInt(16, true, GandivaTypes.GandivaType.INT16_VALUE);
+ testInt(32, false, GandivaTypes.GandivaType.UINT32_VALUE);
+ testInt(32, true, GandivaTypes.GandivaType.INT32_VALUE);
+ testInt(64, false, GandivaTypes.GandivaType.UINT64_VALUE);
+ testInt(64, true, GandivaTypes.GandivaType.INT64_VALUE);
+ }
+
+ private void testFloat(FloatingPointPrecision precision, int expected) throws GandivaException {
+ ArrowType arrowType = new ArrowType.FloatingPoint(precision);
+ GandivaTypes.ExtGandivaType gandivaType = ArrowTypeHelper.arrowTypeToProtobuf(arrowType);
+ assertEquals(expected, gandivaType.getType().getNumber());
+ }
+
+ @Test
+ public void testAllFloats() throws GandivaException {
+ testFloat(FloatingPointPrecision.HALF, GandivaTypes.GandivaType.HALF_FLOAT_VALUE);
+ testFloat(FloatingPointPrecision.SINGLE, GandivaTypes.GandivaType.FLOAT_VALUE);
+ testFloat(FloatingPointPrecision.DOUBLE, GandivaTypes.GandivaType.DOUBLE_VALUE);
+ }
+
+ private void testBasic(ArrowType arrowType, int expected) throws GandivaException {
+ GandivaTypes.ExtGandivaType gandivaType = ArrowTypeHelper.arrowTypeToProtobuf(arrowType);
+ assertEquals(expected, gandivaType.getType().getNumber());
+ }
+
+ @Test
+ public void testSimpleTypes() throws GandivaException {
+ testBasic(new ArrowType.Bool(), GandivaTypes.GandivaType.BOOL_VALUE);
+ testBasic(new ArrowType.Binary(), GandivaTypes.GandivaType.BINARY_VALUE);
+ testBasic(new ArrowType.Utf8(), GandivaTypes.GandivaType.UTF8_VALUE);
+ }
+
+ @Test
+ public void testField() throws GandivaException {
+ Field field = Field.nullable("col1", new ArrowType.Bool());
+ GandivaTypes.Field f = ArrowTypeHelper.arrowFieldToProtobuf(field);
+ assertEquals(field.getName(), f.getName());
+ assertEquals(true, f.getNullable());
+ assertEquals(GandivaTypes.GandivaType.BOOL_VALUE, f.getType().getType().getNumber());
+ }
+
+ @Test
+ public void testSchema() throws GandivaException {
+ Field a = Field.nullable("a", new ArrowType.Int(16, false));
+ Field b = Field.nullable("b", new ArrowType.Int(32, true));
+ Field c = Field.nullable("c", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE));
+
+ List<Field> fields = new ArrayList<Field>();
+ fields.add(a);
+ fields.add(b);
+ fields.add(c);
+
+ GandivaTypes.Schema schema = ArrowTypeHelper.arrowSchemaToProtobuf(new Schema(fields));
+ int idx = 0;
+ for (GandivaTypes.Field f : schema.getColumnsList()) {
+ assertEquals(fields.get(idx).getName(), f.getName());
+ idx++;
+ }
+ }
+}
diff --git a/src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/expression/TreeBuilderTest.java b/src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/expression/TreeBuilderTest.java
new file mode 100644
index 000000000..90373cf79
--- /dev/null
+++ b/src/arrow/java/gandiva/src/test/java/org/apache/arrow/gandiva/expression/TreeBuilderTest.java
@@ -0,0 +1,350 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.expression;
+
+import static org.junit.Assert.*;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.arrow.gandiva.exceptions.GandivaException;
+import org.apache.arrow.gandiva.ipc.GandivaTypes;
+import org.apache.arrow.vector.types.FloatingPointPrecision;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.junit.Test;
+
+public class TreeBuilderTest {
+
+ @Test
+ public void testMakeLiteral() throws GandivaException {
+ TreeNode n = TreeBuilder.makeLiteral(Boolean.TRUE);
+ GandivaTypes.TreeNode node = n.toProtobuf();
+
+ assertEquals(true, node.getBooleanNode().getValue());
+
+ n = TreeBuilder.makeLiteral(new Integer(10));
+ node = n.toProtobuf();
+ assertEquals(10, node.getIntNode().getValue());
+
+ n = TreeBuilder.makeLiteral(new Long(50));
+ node = n.toProtobuf();
+ assertEquals(50, node.getLongNode().getValue());
+
+ Float f = new Float(2.5);
+ n = TreeBuilder.makeLiteral(f);
+ node = n.toProtobuf();
+ assertEquals(f.floatValue(), node.getFloatNode().getValue(), 0.1);
+
+ Double d = new Double(3.3);
+ n = TreeBuilder.makeLiteral(d);
+ node = n.toProtobuf();
+ assertEquals(d.doubleValue(), node.getDoubleNode().getValue(), 0.1);
+
+ String s = new String("hello");
+ n = TreeBuilder.makeStringLiteral(s);
+ node = n.toProtobuf();
+ assertArrayEquals(s.getBytes(), node.getStringNode().getValue().toByteArray());
+
+ byte[] b = new String("hello").getBytes();
+ n = TreeBuilder.makeBinaryLiteral(b);
+ node = n.toProtobuf();
+ assertArrayEquals(b, node.getBinaryNode().getValue().toByteArray());
+ }
+
+ @Test
+ public void testMakeNull() throws GandivaException {
+ TreeNode n = TreeBuilder.makeNull(new ArrowType.Bool());
+ GandivaTypes.TreeNode node = n.toProtobuf();
+ assertEquals(
+ GandivaTypes.GandivaType.BOOL_VALUE, node.getNullNode().getType().getType().getNumber());
+
+ n = TreeBuilder.makeNull(new ArrowType.Int(32, true));
+ node = n.toProtobuf();
+ assertEquals(
+ GandivaTypes.GandivaType.INT32_VALUE, node.getNullNode().getType().getType().getNumber());
+
+ n = TreeBuilder.makeNull(new ArrowType.Int(64, false));
+ node = n.toProtobuf();
+ assertEquals(
+ GandivaTypes.GandivaType.UINT64_VALUE, node.getNullNode().getType().getType().getNumber());
+
+ n = TreeBuilder.makeNull(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE));
+ node = n.toProtobuf();
+ assertEquals(
+ GandivaTypes.GandivaType.FLOAT_VALUE, node.getNullNode().getType().getType().getNumber());
+ }
+
+ @Test
+ public void testMakeField() throws GandivaException {
+ TreeNode n = TreeBuilder.makeField(Field.nullable("a", new ArrowType.Int(32, true)));
+ GandivaTypes.TreeNode node = n.toProtobuf();
+
+ assertEquals("a", node.getFieldNode().getField().getName());
+ assertEquals(
+ GandivaTypes.GandivaType.INT32_VALUE,
+ node.getFieldNode().getField().getType().getType().getNumber());
+ }
+
+ @Test
+ public void testMakeFunction() throws GandivaException {
+ TreeNode a = TreeBuilder.makeField(Field.nullable("a", new ArrowType.Int(64, false)));
+ TreeNode b = TreeBuilder.makeField(Field.nullable("b", new ArrowType.Int(64, false)));
+ List<TreeNode> args = new ArrayList<TreeNode>(2);
+ args.add(a);
+ args.add(b);
+
+ TreeNode addNode = TreeBuilder.makeFunction("add", args, new ArrowType.Int(64, false));
+ GandivaTypes.TreeNode node = addNode.toProtobuf();
+
+ assertTrue(node.hasFnNode());
+ assertEquals("add", node.getFnNode().getFunctionName());
+ assertEquals("a", node.getFnNode().getInArgsList().get(0).getFieldNode().getField().getName());
+ assertEquals("b", node.getFnNode().getInArgsList().get(1).getFieldNode().getField().getName());
+ assertEquals(
+ GandivaTypes.GandivaType.UINT64_VALUE,
+ node.getFnNode().getReturnType().getType().getNumber());
+ }
+
+ @Test
+ public void testMakeIf() throws GandivaException {
+ Field a = Field.nullable("a", new ArrowType.Int(64, false));
+ Field b = Field.nullable("b", new ArrowType.Int(64, false));
+ TreeNode aNode = TreeBuilder.makeField(a);
+ TreeNode bNode = TreeBuilder.makeField(b);
+ List<TreeNode> args = new ArrayList<TreeNode>(2);
+ args.add(aNode);
+ args.add(bNode);
+
+ ArrowType retType = new ArrowType.Bool();
+ TreeNode cond = TreeBuilder.makeFunction("greater_than", args, retType);
+ TreeNode ifNode = TreeBuilder.makeIf(cond, aNode, bNode, retType);
+
+ GandivaTypes.TreeNode node = ifNode.toProtobuf();
+
+ assertTrue(node.hasIfNode());
+ assertEquals("greater_than", node.getIfNode().getCond().getFnNode().getFunctionName());
+ assertEquals(a.getName(), node.getIfNode().getThenNode().getFieldNode().getField().getName());
+ assertEquals(b.getName(), node.getIfNode().getElseNode().getFieldNode().getField().getName());
+ assertEquals(
+ GandivaTypes.GandivaType.BOOL_VALUE,
+ node.getIfNode().getReturnType().getType().getNumber());
+ }
+
+ @Test
+ public void testMakeAnd() throws GandivaException {
+ TreeNode a = TreeBuilder.makeField(Field.nullable("a", new ArrowType.Bool()));
+ TreeNode b = TreeBuilder.makeField(Field.nullable("b", new ArrowType.Bool()));
+ List<TreeNode> args = new ArrayList<TreeNode>(2);
+ args.add(a);
+ args.add(b);
+
+ TreeNode andNode = TreeBuilder.makeAnd(args);
+ GandivaTypes.TreeNode node = andNode.toProtobuf();
+
+ assertTrue(node.hasAndNode());
+ assertEquals(2, node.getAndNode().getArgsList().size());
+ assertEquals("a", node.getAndNode().getArgsList().get(0).getFieldNode().getField().getName());
+ assertEquals("b", node.getAndNode().getArgsList().get(1).getFieldNode().getField().getName());
+ }
+
+ @Test
+ public void testMakeOr() throws GandivaException {
+ TreeNode a = TreeBuilder.makeField(Field.nullable("a", new ArrowType.Bool()));
+ TreeNode b = TreeBuilder.makeField(Field.nullable("b", new ArrowType.Bool()));
+ List<TreeNode> args = new ArrayList<TreeNode>(2);
+ args.add(a);
+ args.add(b);
+
+ TreeNode orNode = TreeBuilder.makeOr(args);
+ GandivaTypes.TreeNode node = orNode.toProtobuf();
+
+ assertTrue(node.hasOrNode());
+ assertEquals(2, node.getOrNode().getArgsList().size());
+ assertEquals("a", node.getOrNode().getArgsList().get(0).getFieldNode().getField().getName());
+ assertEquals("b", node.getOrNode().getArgsList().get(1).getFieldNode().getField().getName());
+ }
+
+ @Test
+ public void testExpression() throws GandivaException {
+ Field a = Field.nullable("a", new ArrowType.Int(64, false));
+ Field b = Field.nullable("b", new ArrowType.Int(64, false));
+ TreeNode aNode = TreeBuilder.makeField(a);
+ TreeNode bNode = TreeBuilder.makeField(b);
+ List<TreeNode> args = new ArrayList<TreeNode>(2);
+ args.add(aNode);
+ args.add(bNode);
+
+ ArrowType retType = new ArrowType.Bool();
+ TreeNode cond = TreeBuilder.makeFunction("greater_than", args, retType);
+ TreeNode ifNode = TreeBuilder.makeIf(cond, aNode, bNode, retType);
+
+ ExpressionTree expr = TreeBuilder.makeExpression(ifNode, Field.nullable("c", retType));
+
+ GandivaTypes.ExpressionRoot root = expr.toProtobuf();
+
+ assertTrue(root.getRoot().hasIfNode());
+ assertEquals(
+ "greater_than", root.getRoot().getIfNode().getCond().getFnNode().getFunctionName());
+ assertEquals("c", root.getResultType().getName());
+ assertEquals(
+ GandivaTypes.GandivaType.BOOL_VALUE, root.getResultType().getType().getType().getNumber());
+ }
+
+ @Test
+ public void testExpression2() throws GandivaException {
+ Field a = Field.nullable("a", new ArrowType.Int(64, false));
+ Field b = Field.nullable("b", new ArrowType.Int(64, false));
+ List<Field> args = new ArrayList<Field>(2);
+ args.add(a);
+ args.add(b);
+
+ Field c = Field.nullable("c", new ArrowType.Int(64, false));
+ ExpressionTree expr = TreeBuilder.makeExpression("add", args, c);
+ GandivaTypes.ExpressionRoot root = expr.toProtobuf();
+
+ GandivaTypes.TreeNode node = root.getRoot();
+
+ assertEquals("c", root.getResultType().getName());
+ assertTrue(node.hasFnNode());
+ assertEquals("add", node.getFnNode().getFunctionName());
+ assertEquals("a", node.getFnNode().getInArgsList().get(0).getFieldNode().getField().getName());
+ assertEquals("b", node.getFnNode().getInArgsList().get(1).getFieldNode().getField().getName());
+ assertEquals(
+ GandivaTypes.GandivaType.UINT64_VALUE,
+ node.getFnNode().getReturnType().getType().getNumber());
+ }
+
+ @Test
+ public void testExpressionWithAnd() throws GandivaException {
+ TreeNode a = TreeBuilder.makeField(Field.nullable("a", new ArrowType.Bool()));
+ TreeNode b = TreeBuilder.makeField(Field.nullable("b", new ArrowType.Bool()));
+ List<TreeNode> args = new ArrayList<TreeNode>(2);
+ args.add(a);
+ args.add(b);
+
+ TreeNode andNode = TreeBuilder.makeAnd(args);
+ ExpressionTree expr =
+ TreeBuilder.makeExpression(andNode, Field.nullable("c", new ArrowType.Bool()));
+ GandivaTypes.ExpressionRoot root = expr.toProtobuf();
+
+ assertTrue(root.getRoot().hasAndNode());
+ assertEquals(
+ "a", root.getRoot().getAndNode().getArgsList().get(0).getFieldNode().getField().getName());
+ assertEquals(
+ "b", root.getRoot().getAndNode().getArgsList().get(1).getFieldNode().getField().getName());
+ assertEquals("c", root.getResultType().getName());
+ assertEquals(
+ GandivaTypes.GandivaType.BOOL_VALUE, root.getResultType().getType().getType().getNumber());
+ }
+
+ @Test
+ public void testExpressionWithOr() throws GandivaException {
+ TreeNode a = TreeBuilder.makeField(Field.nullable("a", new ArrowType.Bool()));
+ TreeNode b = TreeBuilder.makeField(Field.nullable("b", new ArrowType.Bool()));
+ List<TreeNode> args = new ArrayList<TreeNode>(2);
+ args.add(a);
+ args.add(b);
+
+ TreeNode orNode = TreeBuilder.makeOr(args);
+ ExpressionTree expr =
+ TreeBuilder.makeExpression(orNode, Field.nullable("c", new ArrowType.Bool()));
+ GandivaTypes.ExpressionRoot root = expr.toProtobuf();
+
+ assertTrue(root.getRoot().hasOrNode());
+ assertEquals(
+ "a", root.getRoot().getOrNode().getArgsList().get(0).getFieldNode().getField().getName());
+ assertEquals(
+ "b", root.getRoot().getOrNode().getArgsList().get(1).getFieldNode().getField().getName());
+ assertEquals("c", root.getResultType().getName());
+ assertEquals(
+ GandivaTypes.GandivaType.BOOL_VALUE, root.getResultType().getType().getType().getNumber());
+ }
+
+ @Test
+ public void testCondition() throws GandivaException {
+ Field a = Field.nullable("a", new ArrowType.Int(64, false));
+ Field b = Field.nullable("b", new ArrowType.Int(64, false));
+
+ TreeNode aNode = TreeBuilder.makeField(a);
+ TreeNode bNode = TreeBuilder.makeField(b);
+ List<TreeNode> args = new ArrayList<TreeNode>(2);
+ args.add(aNode);
+ args.add(bNode);
+
+ TreeNode root = TreeBuilder.makeFunction("greater_than", args, new ArrowType.Bool());
+ Condition condition = TreeBuilder.makeCondition(root);
+
+ GandivaTypes.Condition conditionProto = condition.toProtobuf();
+ assertTrue(conditionProto.getRoot().hasFnNode());
+ assertEquals("greater_than", conditionProto.getRoot().getFnNode().getFunctionName());
+ assertEquals(
+ "a",
+ conditionProto
+ .getRoot()
+ .getFnNode()
+ .getInArgsList()
+ .get(0)
+ .getFieldNode()
+ .getField()
+ .getName());
+ assertEquals(
+ "b",
+ conditionProto
+ .getRoot()
+ .getFnNode()
+ .getInArgsList()
+ .get(1)
+ .getFieldNode()
+ .getField()
+ .getName());
+ }
+
+ @Test
+ public void testCondition2() throws GandivaException {
+ Field a = Field.nullable("a", new ArrowType.Int(64, false));
+ Field b = Field.nullable("b", new ArrowType.Int(64, false));
+
+ Condition condition = TreeBuilder.makeCondition("greater_than", Arrays.asList(a, b));
+
+ GandivaTypes.Condition conditionProto = condition.toProtobuf();
+ assertTrue(conditionProto.getRoot().hasFnNode());
+ assertEquals("greater_than", conditionProto.getRoot().getFnNode().getFunctionName());
+ assertEquals(
+ "a",
+ conditionProto
+ .getRoot()
+ .getFnNode()
+ .getInArgsList()
+ .get(0)
+ .getFieldNode()
+ .getField()
+ .getName());
+ assertEquals(
+ "b",
+ conditionProto
+ .getRoot()
+ .getFnNode()
+ .getInArgsList()
+ .get(1)
+ .getFieldNode()
+ .getField()
+ .getName());
+ }
+}
diff --git a/src/arrow/java/gandiva/src/test/resources/logback.xml b/src/arrow/java/gandiva/src/test/resources/logback.xml
new file mode 100644
index 000000000..f9e449fa6
--- /dev/null
+++ b/src/arrow/java/gandiva/src/test/resources/logback.xml
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+ license agreements. See the NOTICE file distributed with this work for additional
+ information regarding copyright ownership. The ASF licenses this file to
+ You under the Apache License, Version 2.0 (the "License"); you may not use
+ this file except in compliance with the License. You may obtain a copy of
+ the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+ by applicable law or agreed to in writing, software distributed under the
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ OF ANY KIND, either express or implied. See the License for the specific
+ language governing permissions and limitations under the License. -->
+
+<configuration>
+ <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
+ <!-- encoders are assigned the type
+ ch.qos.logback.classic.encoder.PatternLayoutEncoder by default -->
+ <encoder>
+ <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
+ </encoder>
+ </appender>
+
+ <statusListener class="ch.qos.logback.core.status.NopStatusListener"/>
+ <logger name="org.apache.arrow" additivity="false">
+ <level value="info" />
+ <appender-ref ref="STDOUT" />
+ </logger>
+
+</configuration>
diff --git a/src/arrow/java/memory/memory-core/pom.xml b/src/arrow/java/memory/memory-core/pom.xml
new file mode 100644
index 000000000..eb1226a9e
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/pom.xml
@@ -0,0 +1,40 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+ license agreements. See the NOTICE file distributed with this work for additional
+ information regarding copyright ownership. The ASF licenses this file to
+ You under the Apache License, Version 2.0 (the "License"); you may not use
+ this file except in compliance with the License. You may obtain a copy of
+ the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+ by applicable law or agreed to in writing, software distributed under the
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ OF ANY KIND, either express or implied. See the License for the specific
+ language governing permissions and limitations under the License. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <parent>
+ <artifactId>arrow-memory</artifactId>
+ <groupId>org.apache.arrow</groupId>
+ <version>6.0.1</version>
+ </parent>
+ <modelVersion>4.0.0</modelVersion>
+
+ <artifactId>arrow-memory-core</artifactId>
+
+ <name>Arrow Memory - Core</name>
+ <description>Core off-heap memory management libraries for Arrow ValueVectors.</description>
+
+ <dependencies>
+ <dependency>
+ <groupId>com.google.code.findbugs</groupId>
+ <artifactId>jsr305</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.immutables</groupId>
+ <artifactId>value</artifactId>
+ </dependency>
+ </dependencies>
+
+</project>
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/Accountant.java b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/Accountant.java
new file mode 100644
index 000000000..42dac7b8c
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/Accountant.java
@@ -0,0 +1,308 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+import java.util.concurrent.atomic.AtomicLong;
+
+import javax.annotation.concurrent.ThreadSafe;
+
+import org.apache.arrow.util.Preconditions;
+
+/**
+ * Provides a concurrent way to manage account for memory usage without locking. Used as basis
+ * for Allocators. All
+ * operations are threadsafe (except for close).
+ */
+@ThreadSafe
+class Accountant implements AutoCloseable {
+
+ /**
+ * The parent allocator.
+ */
+ protected final Accountant parent;
+
+ private final String name;
+
+ /**
+ * The amount of memory reserved for this allocator. Releases below this amount of memory will
+ * not be returned to the
+ * parent Accountant until this Accountant is closed.
+ */
+ protected final long reservation;
+
+ private final AtomicLong peakAllocation = new AtomicLong();
+
+ /**
+ * Maximum local memory that can be held. This can be externally updated. Changing it won't
+ * cause past memory to
+ * change but will change responses to future allocation efforts
+ */
+ private final AtomicLong allocationLimit = new AtomicLong();
+
+ /**
+ * Currently allocated amount of memory.
+ */
+ private final AtomicLong locallyHeldMemory = new AtomicLong();
+
+ public Accountant(Accountant parent, String name, long reservation, long maxAllocation) {
+ Preconditions.checkNotNull(name, "name must not be null");
+ Preconditions.checkArgument(reservation >= 0, "The initial reservation size must be non-negative.");
+ Preconditions.checkArgument(maxAllocation >= 0, "The maximum allocation limit must be non-negative.");
+ Preconditions.checkArgument(reservation <= maxAllocation,
+ "The initial reservation size must be <= the maximum allocation.");
+ Preconditions.checkArgument(reservation == 0 || parent != null, "The root accountant can't reserve memory.");
+
+ this.parent = parent;
+ this.name = name;
+ this.reservation = reservation;
+ this.allocationLimit.set(maxAllocation);
+
+ if (reservation != 0) {
+ // we will allocate a reservation from our parent.
+ final AllocationOutcome outcome = parent.allocateBytes(reservation);
+ if (!outcome.isOk()) {
+ throw new OutOfMemoryException(String.format(
+ "Failure trying to allocate initial reservation for Allocator. " +
+ "Attempted to allocate %d bytes.", reservation), outcome.getDetails());
+ }
+ }
+ }
+
+ /**
+ * Attempt to allocate the requested amount of memory. Either completely succeeds or completely
+ * fails. If it fails, no changes are made to accounting.
+ *
+ * @param size The amount of memory to reserve in bytes.
+ * @return the status and details of allocation at each allocator in the chain.
+ */
+ AllocationOutcome allocateBytes(long size) {
+ AllocationOutcome.Status status = allocateBytesInternal(size);
+ if (status.isOk()) {
+ return AllocationOutcome.SUCCESS_INSTANCE;
+ } else {
+ // Try again, but with details this time.
+ // Populating details only on failures avoids performance overhead in the common case (success case).
+ AllocationOutcomeDetails details = new AllocationOutcomeDetails();
+ status = allocateBytesInternal(size, details);
+ return new AllocationOutcome(status, details);
+ }
+ }
+
+ private AllocationOutcome.Status allocateBytesInternal(long size, AllocationOutcomeDetails details) {
+ final AllocationOutcome.Status status = allocate(size,
+ true /*incomingUpdatePeek*/, false /*forceAllocation*/, details);
+ if (!status.isOk()) {
+ releaseBytes(size);
+ }
+ return status;
+ }
+
+ private AllocationOutcome.Status allocateBytesInternal(long size) {
+ return allocateBytesInternal(size, null /*details*/);
+ }
+
+ private void updatePeak() {
+ final long currentMemory = locallyHeldMemory.get();
+ while (true) {
+
+ final long previousPeak = peakAllocation.get();
+ if (currentMemory > previousPeak) {
+ if (!peakAllocation.compareAndSet(previousPeak, currentMemory)) {
+ // peak allocation changed underneath us. try again.
+ continue;
+ }
+ }
+
+ // we either succeeded to set peak allocation or we weren't above the previous peak, exit.
+ return;
+ }
+ }
+
+
+ /**
+ * Increase the accounting. Returns whether the allocation fit within limits.
+ *
+ * @param size to increase
+ * @return Whether the allocation fit within limits.
+ */
+ public boolean forceAllocate(long size) {
+ final AllocationOutcome.Status outcome = allocate(size, true, true, null);
+ return outcome.isOk();
+ }
+
+ /**
+ * Internal method for allocation. This takes a forced approach to allocation to ensure that we
+ * manage reservation
+ * boundary issues consistently. Allocation is always done through the entire tree. The two
+ * options that we influence
+ * are whether the allocation should be forced and whether or not the peak memory allocation
+ * should be updated. If at
+ * some point during allocation escalation we determine that the allocation is no longer
+ * possible, we will continue to
+ * do a complete and consistent allocation but we will stop updating the peak allocation. We do
+ * this because we know
+ * that we will be directly unwinding this allocation (and thus never actually making the
+ * allocation). If force
+ * allocation is passed, then we continue to update the peak limits since we now know that this
+ * allocation will occur
+ * despite our moving past one or more limits.
+ *
+ * @param size The size of the allocation.
+ * @param incomingUpdatePeak Whether we should update the local peak for this allocation.
+ * @param forceAllocation Whether we should force the allocation.
+ * @return The outcome of the allocation.
+ */
+ private AllocationOutcome.Status allocate(final long size, final boolean incomingUpdatePeak,
+ final boolean forceAllocation, AllocationOutcomeDetails details) {
+ final long newLocal = locallyHeldMemory.addAndGet(size);
+ final long beyondReservation = newLocal - reservation;
+ final boolean beyondLimit = newLocal > allocationLimit.get();
+ final boolean updatePeak = forceAllocation || (incomingUpdatePeak && !beyondLimit);
+
+ if (details != null) {
+ // Add details if required (used in exceptions and debugging).
+ boolean allocationFailed = true;
+ long allocatedLocal = 0;
+ if (!beyondLimit) {
+ allocatedLocal = size - Math.min(beyondReservation, size);
+ allocationFailed = false;
+ }
+ details.pushEntry(this, newLocal - size, size, allocatedLocal, allocationFailed);
+ }
+
+ AllocationOutcome.Status parentOutcome = AllocationOutcome.Status.SUCCESS;
+ if (beyondReservation > 0 && parent != null) {
+ // we need to get memory from our parent.
+ final long parentRequest = Math.min(beyondReservation, size);
+ parentOutcome = parent.allocate(parentRequest, updatePeak, forceAllocation, details);
+ }
+
+ final AllocationOutcome.Status finalOutcome;
+ if (beyondLimit) {
+ finalOutcome = AllocationOutcome.Status.FAILED_LOCAL;
+ } else {
+ finalOutcome = parentOutcome.isOk() ? AllocationOutcome.Status.SUCCESS
+ : AllocationOutcome.Status.FAILED_PARENT;
+ }
+
+ if (updatePeak) {
+ updatePeak();
+ }
+
+ return finalOutcome;
+ }
+
+ public void releaseBytes(long size) {
+ // reduce local memory. all memory released above reservation should be released up the tree.
+ final long newSize = locallyHeldMemory.addAndGet(-size);
+
+ Preconditions.checkArgument(newSize >= 0, "Accounted size went negative.");
+
+ final long originalSize = newSize + size;
+ if (originalSize > reservation && parent != null) {
+ // we deallocated memory that we should release to our parent.
+ final long possibleAmountToReleaseToParent = originalSize - reservation;
+ final long actualToReleaseToParent = Math.min(size, possibleAmountToReleaseToParent);
+ parent.releaseBytes(actualToReleaseToParent);
+ }
+ }
+
+ public boolean isOverLimit() {
+ return getAllocatedMemory() > getLimit() || (parent != null && parent.isOverLimit());
+ }
+
+ /**
+ * Close this Accountant. This will release any reservation bytes back to a parent Accountant.
+ */
+ @Override
+ public void close() {
+ // return memory reservation to parent allocator.
+ if (parent != null) {
+ parent.releaseBytes(reservation);
+ }
+ }
+
+ /**
+ * Return the name of the accountant.
+ *
+ * @return name of accountant
+ */
+ public String getName() {
+ return name;
+ }
+
+ /**
+ * Return the current limit of this Accountant.
+ *
+ * @return Limit in bytes.
+ */
+ public long getLimit() {
+ return allocationLimit.get();
+ }
+
+ /**
+ * Return the initial reservation.
+ *
+ * @return reservation in bytes.
+ */
+ public long getInitReservation() {
+ return reservation;
+ }
+
+ /**
+ * Set the maximum amount of memory that can be allocated in the this Accountant before failing
+ * an allocation.
+ *
+ * @param newLimit The limit in bytes.
+ */
+ public void setLimit(long newLimit) {
+ allocationLimit.set(newLimit);
+ }
+
+ /**
+ * Return the current amount of allocated memory that this Accountant is managing accounting
+ * for. Note this does not
+ * include reservation memory that hasn't been allocated.
+ *
+ * @return Currently allocate memory in bytes.
+ */
+ public long getAllocatedMemory() {
+ return locallyHeldMemory.get();
+ }
+
+ /**
+ * The peak memory allocated by this Accountant.
+ *
+ * @return The peak allocated memory in bytes.
+ */
+ public long getPeakMemoryAllocation() {
+ return peakAllocation.get();
+ }
+
+ public long getHeadroom() {
+ long localHeadroom = allocationLimit.get() - locallyHeldMemory.get();
+ if (parent == null) {
+ return localHeadroom;
+ }
+
+ // Amount of reserved memory left on top of what parent has
+ long reservedHeadroom = Math.max(0, reservation - locallyHeldMemory.get());
+ return Math.min(localHeadroom, parent.getHeadroom() + reservedHeadroom);
+ }
+
+}
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationListener.java b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationListener.java
new file mode 100644
index 000000000..ff2b25dfa
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationListener.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+/**
+ * An allocation listener being notified for allocation/deallocation
+ *
+ * <p>It might be called from multiple threads if the allocator hierarchy shares a listener, in which
+ * case, the provider should take care of making the implementation thread-safe.
+ */
+public interface AllocationListener {
+
+ AllocationListener NOOP = new AllocationListener() {};
+
+ /**
+ * Called each time a new buffer has been requested.
+ *
+ * <p>An exception can be safely thrown by this method to terminate the allocation.
+ *
+ * @param size the buffer size being allocated
+ */
+ default void onPreAllocation(long size) {}
+
+ /**
+ * Called each time a new buffer has been allocated.
+ *
+ * <p>An exception cannot be thrown by this method.
+ *
+ * @param size the buffer size being allocated
+ */
+ default void onAllocation(long size) {}
+
+ /**
+ * Informed each time a buffer is released from allocation.
+ *
+ * <p>An exception cannot be thrown by this method.
+ * @param size The size of the buffer being released.
+ */
+ default void onRelease(long size) {}
+
+
+ /**
+ * Called whenever an allocation failed, giving the caller a chance to create some space in the
+ * allocator (either by freeing some resource, or by changing the limit), and, if successful,
+ * allowing the allocator to retry the allocation.
+ *
+ * @param size the buffer size that was being allocated
+ * @param outcome the outcome of the failed allocation. Carries information of what failed
+ * @return true, if the allocation can be retried; false if the allocation should fail
+ */
+ default boolean onFailedAllocation(long size, AllocationOutcome outcome) {
+ return false;
+ }
+
+ /**
+ * Called immediately after a child allocator was added to the parent allocator.
+ *
+ * @param parentAllocator The parent allocator to which a child was added
+ * @param childAllocator The child allocator that was just added
+ */
+ default void onChildAdded(BufferAllocator parentAllocator, BufferAllocator childAllocator) {}
+
+ /**
+ * Called immediately after a child allocator was removed from the parent allocator.
+ *
+ * @param parentAllocator The parent allocator from which a child was removed
+ * @param childAllocator The child allocator that was just removed
+ */
+ default void onChildRemoved(BufferAllocator parentAllocator, BufferAllocator childAllocator) {}
+}
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationManager.java b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationManager.java
new file mode 100644
index 000000000..5f8ab1244
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationManager.java
@@ -0,0 +1,221 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+import java.util.concurrent.atomic.AtomicLong;
+
+import org.apache.arrow.util.Preconditions;
+
+/**
+ * The abstract base class of AllocationManager.
+ *
+ * <p>Manages the relationship between one or more allocators and a particular UDLE. Ensures that
+ * one allocator owns the
+ * memory that multiple allocators may be referencing. Manages a BufferLedger between each of its
+ * associated allocators.
+ *
+ * <p>The only reason that this isn't package private is we're forced to put ArrowBuf in Netty's
+ * package which need access
+ * to these objects or methods.
+ *
+ * <p>Threading: AllocationManager manages thread-safety internally. Operations within the context
+ * of a single BufferLedger
+ * are lockless in nature and can be leveraged by multiple threads. Operations that cross the
+ * context of two ledgers
+ * will acquire a lock on the AllocationManager instance. Important note, there is one
+ * AllocationManager per
+ * UnsafeDirectLittleEndian buffer allocation. As such, there will be thousands of these in a
+ * typical query. The
+ * contention of acquiring a lock on AllocationManager should be very low.
+ */
+public abstract class AllocationManager {
+
+ private static final AtomicLong MANAGER_ID_GENERATOR = new AtomicLong(0);
+
+ private final BufferAllocator root;
+ private final long allocatorManagerId = MANAGER_ID_GENERATOR.incrementAndGet();
+ // ARROW-1627 Trying to minimize memory overhead caused by previously used IdentityHashMap
+ // see JIRA for details
+ private final LowCostIdentityHashMap<BufferAllocator, BufferLedger> map = new LowCostIdentityHashMap<>();
+ private final long amCreationTime = System.nanoTime();
+
+ // The ReferenceManager created at the time of creation of this AllocationManager
+ // is treated as the owning reference manager for the underlying chunk of memory
+ // managed by this allocation manager
+ private volatile BufferLedger owningLedger;
+ private volatile long amDestructionTime = 0;
+
+ protected AllocationManager(BufferAllocator accountingAllocator) {
+ Preconditions.checkNotNull(accountingAllocator);
+ accountingAllocator.assertOpen();
+
+ this.root = accountingAllocator.getRoot();
+
+ // we do a no retain association since our creator will want to retrieve the newly created
+ // ledger and will create a reference count at that point
+ this.owningLedger = associate(accountingAllocator, false);
+ }
+
+ BufferLedger getOwningLedger() {
+ return owningLedger;
+ }
+
+ void setOwningLedger(final BufferLedger ledger) {
+ this.owningLedger = ledger;
+ }
+
+ /**
+ * Associate the existing underlying buffer with a new allocator. This will increase the
+ * reference count on the corresponding buffer ledger by 1
+ *
+ * @param allocator The target allocator to associate this buffer with.
+ * @return The reference manager (new or existing) that associates the underlying
+ * buffer to this new ledger.
+ */
+ BufferLedger associate(final BufferAllocator allocator) {
+ return associate(allocator, true);
+ }
+
+ private BufferLedger associate(final BufferAllocator allocator, final boolean retain) {
+ allocator.assertOpen();
+ Preconditions.checkState(root == allocator.getRoot(),
+ "A buffer can only be associated between two allocators that share the same root");
+
+ synchronized (this) {
+ BufferLedger ledger = map.get(allocator);
+ if (ledger != null) {
+ if (retain) {
+ // bump the ref count for the ledger
+ ledger.increment();
+ }
+ return ledger;
+ }
+
+ ledger = new BufferLedger(allocator, this);
+
+ if (retain) {
+ // the new reference manager will have a ref count of 1
+ ledger.increment();
+ }
+
+ // store the mapping for <allocator, reference manager>
+ BufferLedger oldLedger = map.put(ledger);
+ Preconditions.checkState(oldLedger == null,
+ "Detected inconsistent state: A reference manager already exists for this allocator");
+
+ if (allocator instanceof BaseAllocator) {
+ // needed for debugging only: keep a pointer to reference manager inside allocator
+ // to dump state, verify allocator state etc
+ ((BaseAllocator) allocator).associateLedger(ledger);
+ }
+ return ledger;
+ }
+ }
+
+ /**
+ * The way that a particular ReferenceManager (BufferLedger) communicates back to the
+ * AllocationManager that it no longer needs to hold a reference to a particular
+ * piece of memory. Reference manager needs to hold a lock to invoke this method
+ * It is called when the shared refcount of all the ArrowBufs managed by the
+ * calling ReferenceManager drops to 0.
+ */
+ void release(final BufferLedger ledger) {
+ final BufferAllocator allocator = ledger.getAllocator();
+ allocator.assertOpen();
+
+ // remove the <BaseAllocator, BufferLedger> mapping for the allocator
+ // of calling BufferLedger
+ Preconditions.checkState(map.containsKey(allocator),
+ "Expecting a mapping for allocator and reference manager");
+ final BufferLedger oldLedger = map.remove(allocator);
+
+ BufferAllocator oldAllocator = oldLedger.getAllocator();
+ if (oldAllocator instanceof BaseAllocator) {
+ // needed for debug only: tell the allocator that AllocationManager is removing a
+ // reference manager associated with this particular allocator
+ ((BaseAllocator) oldAllocator).dissociateLedger(oldLedger);
+ }
+
+ if (oldLedger == owningLedger) {
+ // the release call was made by the owning reference manager
+ if (map.isEmpty()) {
+ // the only <allocator, reference manager> mapping was for the owner
+ // which now has been removed, it implies we can safely destroy the
+ // underlying memory chunk as it is no longer being referenced
+ oldAllocator.releaseBytes(getSize());
+ // free the memory chunk associated with the allocation manager
+ release0();
+ oldAllocator.getListener().onRelease(getSize());
+ amDestructionTime = System.nanoTime();
+ owningLedger = null;
+ } else {
+ // since the refcount dropped to 0 for the owning reference manager and allocation
+ // manager will no longer keep a mapping for it, we need to change the owning
+ // reference manager to whatever the next available <allocator, reference manager>
+ // mapping exists.
+ BufferLedger newOwningLedger = map.getNextValue();
+ // we'll forcefully transfer the ownership and not worry about whether we
+ // exceeded the limit since this consumer can't do anything with this.
+ oldLedger.transferBalance(newOwningLedger);
+ }
+ } else {
+ // the release call was made by a non-owning reference manager, so after remove there have
+ // to be 1 or more <allocator, reference manager> mappings
+ Preconditions.checkState(map.size() > 0,
+ "The final removal of reference manager should be connected to owning reference manager");
+ }
+ }
+
+ /**
+ * Return the size of underlying chunk of memory managed by this Allocation Manager.
+ *
+ * <p>The underlying memory chunk managed can be different from the original requested size.
+ *
+ * @return size of underlying memory chunk
+ */
+ public abstract long getSize();
+
+ /**
+ * Return the absolute memory address pointing to the fist byte of underlying memory chunk.
+ */
+ protected abstract long memoryAddress();
+
+ /**
+ * Release the underlying memory chunk.
+ */
+ protected abstract void release0();
+
+ /**
+ * A factory interface for creating {@link AllocationManager}.
+ * One may extend this interface to use a user-defined AllocationManager implementation.
+ */
+ public interface Factory {
+
+ /**
+ * Create an {@link AllocationManager}.
+ *
+ * @param accountingAllocator The allocator that are expected to be associated with newly created AllocationManager.
+ * Currently it is always equivalent to "this"
+ * @param size Size (in bytes) of memory managed by the AllocationManager
+ * @return The created AllocationManager used by this allocator
+ */
+ AllocationManager create(BufferAllocator accountingAllocator, long size);
+
+ ArrowBuf empty();
+ }
+}
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationOutcome.java b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationOutcome.java
new file mode 100644
index 000000000..2977775e6
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationOutcome.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+import java.util.Optional;
+
+/**
+ * Describes the type of outcome that occurred when trying to account for allocation of memory.
+ */
+public class AllocationOutcome {
+ private final Status status;
+ private final AllocationOutcomeDetails details;
+ static final AllocationOutcome SUCCESS_INSTANCE = new AllocationOutcome(Status.SUCCESS);
+
+ AllocationOutcome(Status status, AllocationOutcomeDetails details) {
+ this.status = status;
+ this.details = details;
+ }
+
+ AllocationOutcome(Status status) {
+ this(status, null);
+ }
+
+ /**
+ * Get the status of the allocation.
+ * @return status code.
+ */
+ public Status getStatus() {
+ return status;
+ }
+
+ /**
+ * Get additional details of the allocation (like the status at each allocator in the hierarchy).
+ * @return details of allocation
+ */
+ public Optional<AllocationOutcomeDetails> getDetails() {
+ return Optional.ofNullable(details);
+ }
+
+ /**
+ * Returns true if the allocation was a success.
+ * @return true if allocation was successful, false otherwise.
+ */
+ public boolean isOk() {
+ return status.isOk();
+ }
+
+ /**
+ * Allocation status code.
+ */
+ public enum Status {
+ /**
+ * Allocation succeeded.
+ */
+ SUCCESS(true),
+
+ /**
+ * Allocation succeeded but only because the allocator was forced to move beyond a limit.
+ */
+ FORCED_SUCCESS(true),
+
+ /**
+ * Allocation failed because the local allocator's limits were exceeded.
+ */
+ FAILED_LOCAL(false),
+
+ /**
+ * Allocation failed because a parent allocator's limits were exceeded.
+ */
+ FAILED_PARENT(false);
+
+ private final boolean ok;
+
+ Status(boolean ok) {
+ this.ok = ok;
+ }
+
+ public boolean isOk() {
+ return ok;
+ }
+ }
+}
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationOutcomeDetails.java b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationOutcomeDetails.java
new file mode 100644
index 000000000..6499ce84b
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationOutcomeDetails.java
@@ -0,0 +1,132 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+import java.util.ArrayDeque;
+import java.util.Deque;
+
+/**
+ * Captures details of allocation for each accountant in the hierarchical chain.
+ */
+public class AllocationOutcomeDetails {
+ Deque<Entry> allocEntries;
+
+ AllocationOutcomeDetails() {
+ allocEntries = new ArrayDeque<>();
+ }
+
+ void pushEntry(Accountant accountant, long totalUsedBeforeAllocation, long requestedSize,
+ long allocatedSize, boolean allocationFailed) {
+
+ Entry top = allocEntries.peekLast();
+ if (top != null && top.allocationFailed) {
+ // if the allocation has already failed, stop saving the entries.
+ return;
+ }
+
+ allocEntries.addLast(new Entry(accountant, totalUsedBeforeAllocation, requestedSize,
+ allocatedSize, allocationFailed));
+ }
+
+ /**
+ * Get the allocator that caused the failure.
+ * @return the allocator that caused failure, null if there was no failure.
+ */
+ public BufferAllocator getFailedAllocator() {
+ Entry top = allocEntries.peekLast();
+ if (top != null && top.allocationFailed && (top.accountant instanceof BufferAllocator)) {
+ return (BufferAllocator) top.accountant;
+ } else {
+ return null;
+ }
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("Allocation outcome details:\n");
+ allocEntries.forEach(sb::append);
+ return sb.toString();
+ }
+
+ /**
+ * Outcome of the allocation request at one accountant in the hierarchy.
+ */
+ public static class Entry {
+ private final Accountant accountant;
+
+ // Remember allocator attributes at the time of the request.
+ private final long limit;
+ private final long used;
+
+ // allocation outcome
+ private final long requestedSize;
+ private final long allocatedSize;
+ private final boolean allocationFailed;
+
+ Entry(Accountant accountant, long totalUsedBeforeAllocation, long requestedSize,
+ long allocatedSize, boolean allocationFailed) {
+ this.accountant = accountant;
+ this.limit = accountant.getLimit();
+ this.used = totalUsedBeforeAllocation;
+
+ this.requestedSize = requestedSize;
+ this.allocatedSize = allocatedSize;
+ this.allocationFailed = allocationFailed;
+ }
+
+ public Accountant getAccountant() {
+ return accountant;
+ }
+
+ public long getLimit() {
+ return limit;
+ }
+
+ public long getUsed() {
+ return used;
+ }
+
+ public long getRequestedSize() {
+ return requestedSize;
+ }
+
+ public long getAllocatedSize() {
+ return allocatedSize;
+ }
+
+ public boolean isAllocationFailed() {
+ return allocationFailed;
+ }
+
+ @Override
+ public String toString() {
+ return new StringBuilder()
+ .append("allocator[" + accountant.getName() + "]")
+ .append(" reservation: " + accountant.getInitReservation())
+ .append(" limit: " + limit)
+ .append(" used: " + used)
+ .append(" requestedSize: " + requestedSize)
+ .append(" allocatedSize: " + allocatedSize)
+ .append(" localAllocationStatus: " + (allocationFailed ? "fail" : "success"))
+ .append("\n")
+ .toString();
+ }
+ }
+
+}
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationReservation.java b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationReservation.java
new file mode 100644
index 000000000..4331eb20c
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationReservation.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+/**
+ * Supports cumulative allocation reservation. Clients may increase the size of the reservation
+ * repeatedly until they
+ * call for an allocation of the current total size. The reservation can only be used once, and
+ * will throw an exception
+ * if it is used more than once.
+ * <p>
+ * For the purposes of airtight memory accounting, the reservation must be close()d whether it is
+ * used or not.
+ * This is not threadsafe.
+ * </p>
+ */
+public interface AllocationReservation extends AutoCloseable {
+
+ /**
+ * Add to the current reservation.
+ *
+ * <p>Adding may fail if the allocator is not allowed to consume any more space.</p>
+ *
+ * @param nBytes the number of bytes to add
+ * @return true if the addition is possible, false otherwise
+ * @throws IllegalStateException if called after buffer() is used to allocate the reservation
+ */
+ boolean add(int nBytes);
+
+ /**
+ * Requests a reservation of additional space.
+ *
+ * <p>The implementation of the allocator's inner class provides this.</p>
+ *
+ * @param nBytes the amount to reserve
+ * @return true if the reservation can be satisfied, false otherwise
+ */
+ boolean reserve(int nBytes);
+
+ /**
+ * Allocate a buffer whose size is the total of all the add()s made.
+ *
+ * <p>The allocation request can still fail, even if the amount of space
+ * requested is available, if the allocation cannot be made contiguously.</p>
+ *
+ * @return the buffer, or null, if the request cannot be satisfied
+ * @throws IllegalStateException if called called more than once
+ */
+ ArrowBuf allocateBuffer();
+
+ /**
+ * Get the current size of the reservation (the sum of all the add()s).
+ *
+ * @return size of the current reservation
+ */
+ int getSize();
+
+ /**
+ * Return whether or not the reservation has been used.
+ *
+ * @return whether or not the reservation has been used
+ */
+ boolean isUsed();
+
+ /**
+ * Return whether or not the reservation has been closed.
+ *
+ * @return whether or not the reservation has been closed
+ */
+ boolean isClosed();
+
+ void close();
+}
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocatorClosedException.java b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocatorClosedException.java
new file mode 100644
index 000000000..39c2ef82e
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocatorClosedException.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+/**
+ * Exception thrown when a closed BufferAllocator is used. Note
+ * this is an unchecked exception.
+ */
+@SuppressWarnings("serial")
+public class AllocatorClosedException extends RuntimeException {
+
+ /**
+ * Constructs a new allocator closed exception with a given message.
+ *
+ * @param message string associated with the cause
+ */
+ public AllocatorClosedException(String message) {
+ super(message);
+ }
+}
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ArrowBuf.java b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ArrowBuf.java
new file mode 100644
index 000000000..d7827073e
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ArrowBuf.java
@@ -0,0 +1,1202 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.ReadOnlyBufferException;
+import java.util.concurrent.atomic.AtomicLong;
+
+import org.apache.arrow.memory.BaseAllocator.Verbosity;
+import org.apache.arrow.memory.util.CommonUtil;
+import org.apache.arrow.memory.util.HistoricalLog;
+import org.apache.arrow.memory.util.MemoryUtil;
+import org.apache.arrow.util.Preconditions;
+
+/**
+ * ArrowBuf serves as a facade over underlying memory by providing
+ * several access APIs to read/write data into a chunk of direct
+ * memory. All the accounting, ownership and reference management
+ * is done by {@link ReferenceManager} and ArrowBuf can work
+ * with a custom user provided implementation of ReferenceManager
+ * <p>
+ * Two important instance variables of an ArrowBuf:
+ * (1) address - starting virtual address in the underlying memory
+ * chunk that this ArrowBuf has access to
+ * (2) length - length (in bytes) in the underlying memory chunk
+ * that this ArrowBuf has access to
+ * </p>
+ * <p>
+ * The management (allocation, deallocation, reference counting etc) for
+ * the memory chunk is not done by ArrowBuf.
+ * Default implementation of ReferenceManager, allocation is in
+ * {@link BaseAllocator}, {@link BufferLedger} and {@link AllocationManager}
+ * </p>
+ */
+public final class ArrowBuf implements AutoCloseable {
+
+ private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(ArrowBuf.class);
+
+ private static final int SHORT_SIZE = Short.BYTES;
+ private static final int INT_SIZE = Integer.BYTES;
+ private static final int FLOAT_SIZE = Float.BYTES;
+ private static final int DOUBLE_SIZE = Double.BYTES;
+ private static final int LONG_SIZE = Long.BYTES;
+
+ private static final AtomicLong idGenerator = new AtomicLong(0);
+ private static final int LOG_BYTES_PER_ROW = 10;
+ private final long id = idGenerator.incrementAndGet();
+ private final ReferenceManager referenceManager;
+ private final BufferManager bufferManager;
+ private final long addr;
+ private long readerIndex;
+ private long writerIndex;
+ private final HistoricalLog historicalLog = BaseAllocator.DEBUG ?
+ new HistoricalLog(BaseAllocator.DEBUG_LOG_LENGTH, "ArrowBuf[%d]", id) : null;
+ private volatile long length;
+
+ /**
+ * Constructs a new ArrowBuf.
+ *
+ * @param referenceManager The memory manager to track memory usage and reference count of this buffer
+ * @param length The byte length of this buffer
+ */
+ public ArrowBuf(
+ final ReferenceManager referenceManager,
+ final BufferManager bufferManager,
+ final long length,
+ final long memoryAddress) {
+ this.referenceManager = referenceManager;
+ this.bufferManager = bufferManager;
+ this.addr = memoryAddress;
+ this.length = length;
+ this.readerIndex = 0;
+ this.writerIndex = 0;
+ if (BaseAllocator.DEBUG) {
+ historicalLog.recordEvent("create()");
+ }
+ }
+
+ public int refCnt() {
+ return referenceManager.getRefCount();
+ }
+
+ /**
+ * Allows a function to determine whether not reading a particular string of bytes is valid.
+ *
+ * <p>Will throw an exception if the memory is not readable for some reason. Only doesn't
+ * something in the case that
+ * AssertionUtil.BOUNDS_CHECKING_ENABLED is true.
+ *
+ * @param start The starting position of the bytes to be read.
+ * @param end The exclusive endpoint of the bytes to be read.
+ */
+ public void checkBytes(long start, long end) {
+ if (BoundsChecking.BOUNDS_CHECKING_ENABLED) {
+ checkIndexD(start, end - start);
+ }
+ }
+
+ /**
+ * For get/set operations, reference count should be >= 1.
+ */
+ private void ensureAccessible() {
+ if (this.refCnt() == 0) {
+ throw new IllegalStateException("Ref count should be >= 1 for accessing the ArrowBuf");
+ }
+ }
+
+ /**
+ * Get reference manager for this ArrowBuf.
+ * @return user provided implementation of {@link ReferenceManager}
+ */
+ public ReferenceManager getReferenceManager() {
+ return referenceManager;
+ }
+
+ public long capacity() {
+ return length;
+ }
+
+ /**
+ * Adjusts the capacity of this buffer. Size increases are NOT supported.
+ *
+ * @param newCapacity Must be in in the range [0, length).
+ */
+ public synchronized ArrowBuf capacity(long newCapacity) {
+
+ if (newCapacity == length) {
+ return this;
+ }
+
+ Preconditions.checkArgument(newCapacity >= 0);
+
+ if (newCapacity < length) {
+ length = newCapacity;
+ return this;
+ }
+
+ throw new UnsupportedOperationException("Buffers don't support resizing that increases the size.");
+ }
+
+ /**
+ * Returns the byte order of elements in this buffer.
+ */
+ public ByteOrder order() {
+ return ByteOrder.nativeOrder();
+ }
+
+ /**
+ * Returns the number of bytes still available to read in this buffer.
+ */
+ public long readableBytes() {
+ Preconditions.checkState(writerIndex >= readerIndex,
+ "Writer index cannot be less than reader index");
+ return writerIndex - readerIndex;
+ }
+
+ /**
+ * Returns the number of bytes still available to write into this buffer before capacity is reached.
+ */
+ public long writableBytes() {
+ return capacity() - writerIndex;
+ }
+
+ /**
+ * Returns a slice of only the readable bytes in the buffer.
+ */
+ public ArrowBuf slice() {
+ return slice(readerIndex, readableBytes());
+ }
+
+ /**
+ * Returns a slice (view) starting at <code>index</code> with the given <code>length</code>.
+ */
+ public ArrowBuf slice(long index, long length) {
+
+ Preconditions.checkPositionIndex(index, this.length);
+ Preconditions.checkPositionIndex(index + length, this.length);
+
+ /*
+ * Re the behavior of reference counting, see http://netty.io/wiki/reference-counted-objects
+ * .html#wiki-h3-5, which
+ * explains that derived buffers share their reference count with their parent
+ */
+ final ArrowBuf newBuf = referenceManager.deriveBuffer(this, index, length);
+ newBuf.writerIndex(length);
+ return newBuf;
+ }
+
+ /**
+ * Make a nio byte buffer from this arrowbuf.
+ */
+ public ByteBuffer nioBuffer() {
+ return nioBuffer(readerIndex, checkedCastToInt(readableBytes()));
+ }
+
+
+ /**
+ * Make a nio byte buffer from this ArrowBuf.
+ */
+ public ByteBuffer nioBuffer(long index, int length) {
+ chk(index, length);
+ return getDirectBuffer(index, length);
+ }
+
+ private ByteBuffer getDirectBuffer(long index, int length) {
+ long address = addr(index);
+ return MemoryUtil.directBuffer(address, length);
+ }
+
+ public long memoryAddress() {
+ return this.addr;
+ }
+
+ @Override
+ public String toString() {
+ return String.format("ArrowBuf[%d], address:%d, length:%d", id, memoryAddress(), length);
+ }
+
+ @Override
+ public int hashCode() {
+ return System.identityHashCode(this);
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ // identity equals only.
+ return this == obj;
+ }
+
+ /*
+ * IMPORTANT NOTE
+ * The data getters and setters work with a caller provided
+ * index. This index is 0 based and since ArrowBuf has access
+ * to a portion of underlying chunk of memory starting at
+ * some address, we convert the given relative index into
+ * absolute index as memory address + index.
+ *
+ * Example:
+ *
+ * Let's say we have an underlying chunk of memory of length 64 bytes
+ * Now let's say we have an ArrowBuf that has access to the chunk
+ * from offset 4 for length of 16 bytes.
+ *
+ * If the starting virtual address of chunk is MAR, then memory
+ * address of this ArrowBuf is MAR + offset -- this is what is stored
+ * in variable addr. See the BufferLedger and AllocationManager code
+ * for the implementation of ReferenceManager that manages a
+ * chunk of memory and creates ArrowBuf with access to a range of
+ * bytes within the chunk (or the entire chunk)
+ *
+ * So now to get/set data, we will do => addr + index
+ * This logic is put in method addr(index) and is frequently
+ * used in get/set data methods to compute the absolute
+ * byte address for get/set operation in the underlying chunk
+ *
+ * @param index the index at which we the user wants to read/write
+ * @return the absolute address within the memory
+ */
+ private long addr(long index) {
+ return addr + index;
+ }
+
+
+
+ /*-------------------------------------------------*
+ | Following are a set of fast path data set and |
+ | get APIs to write/read data from ArrowBuf |
+ | at a given index (0 based relative to this |
+ | ArrowBuf and not relative to the underlying |
+ | memory chunk). |
+ | |
+ *-------------------------------------------------*/
+
+
+
+ /**
+ * Helper function to do bounds checking at a particular
+ * index for particular length of data.
+ * @param index index (0 based relative to this ArrowBuf)
+ * @param length provided length of data for get/set
+ */
+ private void chk(long index, long length) {
+ if (BoundsChecking.BOUNDS_CHECKING_ENABLED) {
+ checkIndexD(index, length);
+ }
+ }
+
+ private void checkIndexD(long index, long fieldLength) {
+ // check reference count
+ ensureAccessible();
+ // check bounds
+ Preconditions.checkArgument(fieldLength >= 0, "expecting non-negative data length");
+ if (index < 0 || index > capacity() - fieldLength) {
+ if (BaseAllocator.DEBUG) {
+ historicalLog.logHistory(logger);
+ }
+ throw new IndexOutOfBoundsException(String.format(
+ "index: %d, length: %d (expected: range(0, %d))", index, fieldLength, capacity()));
+ }
+ }
+
+ /**
+ * Get long value stored at a particular index in the
+ * underlying memory chunk this ArrowBuf has access to.
+ * @param index index (0 based relative to this ArrowBuf)
+ * where the value will be read from
+ * @return 8 byte long value
+ */
+ public long getLong(long index) {
+ chk(index, LONG_SIZE);
+ return MemoryUtil.UNSAFE.getLong(addr(index));
+ }
+
+ /**
+ * Set long value at a particular index in the
+ * underlying memory chunk this ArrowBuf has access to.
+ * @param index index (0 based relative to this ArrowBuf)
+ * where the value will be written
+ * @param value value to write
+ */
+ public void setLong(long index, long value) {
+ chk(index, LONG_SIZE);
+ MemoryUtil.UNSAFE.putLong(addr(index), value);
+ }
+
+ /**
+ * Get float value stored at a particular index in the
+ * underlying memory chunk this ArrowBuf has access to.
+ * @param index index (0 based relative to this ArrowBuf)
+ * where the value will be read from
+ * @return 4 byte float value
+ */
+ public float getFloat(long index) {
+ return Float.intBitsToFloat(getInt(index));
+ }
+
+ /**
+ * Set float value at a particular index in the
+ * underlying memory chunk this ArrowBuf has access to.
+ * @param index index (0 based relative to this ArrowBuf)
+ * where the value will be written
+ * @param value value to write
+ */
+ public void setFloat(long index, float value) {
+ chk(index, FLOAT_SIZE);
+ MemoryUtil.UNSAFE.putInt(addr(index), Float.floatToRawIntBits(value));
+ }
+
+ /**
+ * Get double value stored at a particular index in the
+ * underlying memory chunk this ArrowBuf has access to.
+ * @param index index (0 based relative to this ArrowBuf)
+ * where the value will be read from
+ * @return 8 byte double value
+ */
+ public double getDouble(long index) {
+ return Double.longBitsToDouble(getLong(index));
+ }
+
+ /**
+ * Set double value at a particular index in the
+ * underlying memory chunk this ArrowBuf has access to.
+ * @param index index (0 based relative to this ArrowBuf)
+ * where the value will be written
+ * @param value value to write
+ */
+ public void setDouble(long index, double value) {
+ chk(index, DOUBLE_SIZE);
+ MemoryUtil.UNSAFE.putLong(addr(index), Double.doubleToRawLongBits(value));
+ }
+
+ /**
+ * Get char value stored at a particular index in the
+ * underlying memory chunk this ArrowBuf has access to.
+ * @param index index (0 based relative to this ArrowBuf)
+ * where the value will be read from
+ * @return 2 byte char value
+ */
+ public char getChar(long index) {
+ return (char) getShort(index);
+ }
+
+ /**
+ * Set char value at a particular index in the
+ * underlying memory chunk this ArrowBuf has access to.
+ * @param index index (0 based relative to this ArrowBuf)
+ * where the value will be written
+ * @param value value to write
+ */
+ public void setChar(long index, int value) {
+ chk(index, SHORT_SIZE);
+ MemoryUtil.UNSAFE.putShort(addr(index), (short) value);
+ }
+
+ /**
+ * Get int value stored at a particular index in the
+ * underlying memory chunk this ArrowBuf has access to.
+ * @param index index (0 based relative to this ArrowBuf)
+ * where the value will be read from
+ * @return 4 byte int value
+ */
+ public int getInt(long index) {
+ chk(index, INT_SIZE);
+ return MemoryUtil.UNSAFE.getInt(addr(index));
+ }
+
+ /**
+ * Set int value at a particular index in the
+ * underlying memory chunk this ArrowBuf has access to.
+ * @param index index (0 based relative to this ArrowBuf)
+ * where the value will be written
+ * @param value value to write
+ */
+ public void setInt(long index, int value) {
+ chk(index, INT_SIZE);
+ MemoryUtil.UNSAFE.putInt(addr(index), value);
+ }
+
+ /**
+ * Get short value stored at a particular index in the
+ * underlying memory chunk this ArrowBuf has access to.
+ * @param index index (0 based relative to this ArrowBuf)
+ * where the value will be read from
+ * @return 2 byte short value
+ */
+ public short getShort(long index) {
+ chk(index, SHORT_SIZE);
+ return MemoryUtil.UNSAFE.getShort(addr(index));
+ }
+
+ /**
+ * Set short value at a particular index in the
+ * underlying memory chunk this ArrowBuf has access to.
+ * @param index index (0 based relative to this ArrowBuf)
+ * where the value will be written
+ * @param value value to write
+ */
+ public void setShort(long index, int value) {
+ setShort(index, (short) value);
+ }
+
+ /**
+ * Set short value at a particular index in the
+ * underlying memory chunk this ArrowBuf has access to.
+ * @param index index (0 based relative to this ArrowBuf)
+ * where the value will be written
+ * @param value value to write
+ */
+ public void setShort(long index, short value) {
+ chk(index, SHORT_SIZE);
+ MemoryUtil.UNSAFE.putShort(addr(index), value);
+ }
+
+ /**
+ * Set byte value at a particular index in the
+ * underlying memory chunk this ArrowBuf has access to.
+ * @param index index (0 based relative to this ArrowBuf)
+ * where the value will be written
+ * @param value value to write
+ */
+ public void setByte(long index, int value) {
+ chk(index, 1);
+ MemoryUtil.UNSAFE.putByte(addr(index), (byte) value);
+ }
+
+ /**
+ * Set byte value at a particular index in the
+ * underlying memory chunk this ArrowBuf has access to.
+ * @param index index (0 based relative to this ArrowBuf)
+ * where the value will be written
+ * @param value value to write
+ */
+ public void setByte(long index, byte value) {
+ chk(index, 1);
+ MemoryUtil.UNSAFE.putByte(addr(index), value);
+ }
+
+ /**
+ * Get byte value stored at a particular index in the
+ * underlying memory chunk this ArrowBuf has access to.
+ * @param index index (0 based relative to this ArrowBuf)
+ * where the value will be read from
+ * @return byte value
+ */
+ public byte getByte(long index) {
+ chk(index, 1);
+ return MemoryUtil.UNSAFE.getByte(addr(index));
+ }
+
+
+
+ /*--------------------------------------------------*
+ | Following are another set of data set APIs |
+ | that directly work with writerIndex |
+ | |
+ *--------------------------------------------------*/
+
+
+
+ /**
+ * Helper function to do bound checking w.r.t writerIndex
+ * by checking if we can set "length" bytes of data at the
+ * writerIndex in this ArrowBuf.
+ * @param length provided length of data for set
+ */
+ private void ensureWritable(final int length) {
+ if (BoundsChecking.BOUNDS_CHECKING_ENABLED) {
+ Preconditions.checkArgument(length >= 0, "expecting non-negative length");
+ // check reference count
+ this.ensureAccessible();
+ // check bounds
+ if (length > writableBytes()) {
+ throw new IndexOutOfBoundsException(
+ String.format("writerIndex(%d) + length(%d) exceeds capacity(%d)", writerIndex, length, capacity()));
+ }
+ }
+ }
+
+ /**
+ * Helper function to do bound checking w.r.t readerIndex
+ * by checking if we can read "length" bytes of data at the
+ * readerIndex in this ArrowBuf.
+ * @param length provided length of data for get
+ */
+ private void ensureReadable(final int length) {
+ if (BoundsChecking.BOUNDS_CHECKING_ENABLED) {
+ Preconditions.checkArgument(length >= 0, "expecting non-negative length");
+ // check reference count
+ this.ensureAccessible();
+ // check bounds
+ if (length > readableBytes()) {
+ throw new IndexOutOfBoundsException(
+ String.format("readerIndex(%d) + length(%d) exceeds writerIndex(%d)", readerIndex, length, writerIndex));
+ }
+ }
+ }
+
+ /**
+ * Read the byte at readerIndex.
+ * @return byte value
+ */
+ public byte readByte() {
+ ensureReadable(1);
+ final byte b = getByte(readerIndex);
+ ++readerIndex;
+ return b;
+ }
+
+ /**
+ * Read dst.length bytes at readerIndex into dst byte array
+ * @param dst byte array where the data will be written
+ */
+ public void readBytes(byte[] dst) {
+ Preconditions.checkArgument(dst != null, "expecting valid dst bytearray");
+ ensureReadable(dst.length);
+ getBytes(readerIndex, dst, 0, checkedCastToInt(dst.length));
+ }
+
+ /**
+ * Set the provided byte value at the writerIndex.
+ * @param value value to set
+ */
+ public void writeByte(byte value) {
+ ensureWritable(1);
+ MemoryUtil.UNSAFE.putByte(addr(writerIndex), value);
+ ++writerIndex;
+ }
+
+ /**
+ * Set the lower order byte for the provided value at
+ * the writerIndex.
+ * @param value value to be set
+ */
+ public void writeByte(int value) {
+ ensureWritable(1);
+ MemoryUtil.UNSAFE.putByte(addr(writerIndex), (byte) value);
+ ++writerIndex;
+ }
+
+ /**
+ * Write the bytes from given byte array into this
+ * ArrowBuf starting at writerIndex.
+ * @param src src byte array
+ */
+ public void writeBytes(byte[] src) {
+ Preconditions.checkArgument(src != null, "expecting valid src array");
+ writeBytes(src, 0, src.length);
+ }
+
+ /**
+ * Write the bytes from given byte array starting at srcIndex
+ * into this ArrowBuf starting at writerIndex.
+ * @param src src byte array
+ * @param srcIndex index in the byte array where the copy will being from
+ * @param length length of data to copy
+ */
+ public void writeBytes(byte[] src, int srcIndex, int length) {
+ ensureWritable(length);
+ setBytes(writerIndex, src, srcIndex, length);
+ writerIndex += length;
+ }
+
+ /**
+ * Set the provided int value as short at the writerIndex.
+ * @param value value to set
+ */
+ public void writeShort(int value) {
+ ensureWritable(SHORT_SIZE);
+ MemoryUtil.UNSAFE.putShort(addr(writerIndex), (short) value);
+ writerIndex += SHORT_SIZE;
+ }
+
+ /**
+ * Set the provided int value at the writerIndex.
+ * @param value value to set
+ */
+ public void writeInt(int value) {
+ ensureWritable(INT_SIZE);
+ MemoryUtil.UNSAFE.putInt(addr(writerIndex), value);
+ writerIndex += INT_SIZE;
+ }
+
+ /**
+ * Set the provided long value at the writerIndex.
+ * @param value value to set
+ */
+ public void writeLong(long value) {
+ ensureWritable(LONG_SIZE);
+ MemoryUtil.UNSAFE.putLong(addr(writerIndex), value);
+ writerIndex += LONG_SIZE;
+ }
+
+ /**
+ * Set the provided float value at the writerIndex.
+ * @param value value to set
+ */
+ public void writeFloat(float value) {
+ ensureWritable(FLOAT_SIZE);
+ MemoryUtil.UNSAFE.putInt(addr(writerIndex), Float.floatToRawIntBits(value));
+ writerIndex += FLOAT_SIZE;
+ }
+
+ /**
+ * Set the provided double value at the writerIndex.
+ * @param value value to set
+ */
+ public void writeDouble(double value) {
+ ensureWritable(DOUBLE_SIZE);
+ MemoryUtil.UNSAFE.putLong(addr(writerIndex), Double.doubleToRawLongBits(value));
+ writerIndex += DOUBLE_SIZE;
+ }
+
+
+ /*--------------------------------------------------*
+ | Following are another set of data set/get APIs |
+ | that read and write stream of bytes from/to byte |
+ | arrays, ByteBuffer, ArrowBuf etc |
+ | |
+ *--------------------------------------------------*/
+
+ /**
+ * Determine if the requested {@code index} and {@code length} will fit within {@code capacity}.
+ * @param index The starting index.
+ * @param length The length which will be utilized (starting from {@code index}).
+ * @param capacity The capacity that {@code index + length} is allowed to be within.
+ * @return {@code true} if the requested {@code index} and {@code length} will fit within {@code capacity}.
+ * {@code false} if this would result in an index out of bounds exception.
+ */
+ private static boolean isOutOfBounds(long index, long length, long capacity) {
+ return (index | length | (index + length) | (capacity - (index + length))) < 0;
+ }
+
+ private void checkIndex(long index, long fieldLength) {
+ // check reference count
+ this.ensureAccessible();
+ // check bounds
+ if (isOutOfBounds(index, fieldLength, this.capacity())) {
+ throw new IndexOutOfBoundsException(String.format("index: %d, length: %d (expected: range(0, %d))",
+ index, fieldLength, this.capacity()));
+ }
+ }
+
+ /**
+ * Copy data from this ArrowBuf at a given index in into destination
+ * byte array.
+ * @param index starting index (0 based relative to the portion of memory)
+ * this ArrowBuf has access to
+ * @param dst byte array to copy the data into
+ */
+ public void getBytes(long index, byte[] dst) {
+ getBytes(index, dst, 0, dst.length);
+ }
+
+ /**
+ * Copy data from this ArrowBuf at a given index into destination byte array.
+ * @param index index (0 based relative to the portion of memory
+ * this ArrowBuf has access to)
+ * @param dst byte array to copy the data into
+ * @param dstIndex starting index in dst byte array to copy into
+ * @param length length of data to copy from this ArrowBuf
+ */
+ public void getBytes(long index, byte[] dst, int dstIndex, int length) {
+ // bound check for this ArrowBuf where the data will be copied from
+ checkIndex(index, length);
+ // null check
+ Preconditions.checkArgument(dst != null, "expecting a valid dst byte array");
+ // bound check for dst byte array where the data will be copied to
+ if (isOutOfBounds(dstIndex, length, dst.length)) {
+ // not enough space to copy "length" bytes into dst array from dstIndex onwards
+ throw new IndexOutOfBoundsException("Not enough space to copy data into destination" + dstIndex);
+ }
+ if (length != 0) {
+ // copy "length" bytes from this ArrowBuf starting at addr(index) address
+ // into dst byte array at dstIndex onwards
+ MemoryUtil.UNSAFE.copyMemory(null, addr(index), dst, MemoryUtil.BYTE_ARRAY_BASE_OFFSET + dstIndex, length);
+ }
+ }
+
+ /**
+ * Copy data from a given byte array into this ArrowBuf starting at
+ * a given index.
+ * @param index starting index (0 based relative to the portion of memory)
+ * this ArrowBuf has access to
+ * @param src byte array to copy the data from
+ */
+ public void setBytes(long index, byte[] src) {
+ setBytes(index, src, 0, src.length);
+ }
+
+ /**
+ * Copy data from a given byte array starting at the given source index into
+ * this ArrowBuf at a given index.
+ * @param index index (0 based relative to the portion of memory this ArrowBuf
+ * has access to)
+ * @param src src byte array to copy the data from
+ * @param srcIndex index in the byte array where the copy will start from
+ * @param length length of data to copy from byte array
+ */
+ public void setBytes(long index, byte[] src, int srcIndex, long length) {
+ // bound check for this ArrowBuf where the data will be copied into
+ checkIndex(index, length);
+ // null check
+ Preconditions.checkArgument(src != null, "expecting a valid src byte array");
+ // bound check for src byte array where the data will be copied from
+ if (isOutOfBounds(srcIndex, length, src.length)) {
+ // not enough space to copy "length" bytes into dst array from dstIndex onwards
+ throw new IndexOutOfBoundsException("Not enough space to copy data from byte array" + srcIndex);
+ }
+ if (length > 0) {
+ // copy "length" bytes from src byte array at the starting index (srcIndex)
+ // into this ArrowBuf starting at address "addr(index)"
+ MemoryUtil.UNSAFE.copyMemory(src, MemoryUtil.BYTE_ARRAY_BASE_OFFSET + srcIndex, null, addr(index), length);
+ }
+ }
+
+ /**
+ * Copy data from this ArrowBuf at a given index into the destination
+ * ByteBuffer.
+ * @param index index (0 based relative to the portion of memory this ArrowBuf
+ * has access to)
+ * @param dst dst ByteBuffer where the data will be copied into
+ */
+ public void getBytes(long index, ByteBuffer dst) {
+ // bound check for this ArrowBuf where the data will be copied from
+ checkIndex(index, dst.remaining());
+ // dst.remaining() bytes of data will be copied into dst ByteBuffer
+ if (dst.remaining() != 0) {
+ // address in this ArrowBuf where the copy will begin from
+ final long srcAddress = addr(index);
+ if (dst.isDirect()) {
+ if (dst.isReadOnly()) {
+ throw new ReadOnlyBufferException();
+ }
+ // copy dst.remaining() bytes of data from this ArrowBuf starting
+ // at address srcAddress into the dst ByteBuffer starting at
+ // address dstAddress
+ final long dstAddress = MemoryUtil.getByteBufferAddress(dst) + dst.position();
+ MemoryUtil.UNSAFE.copyMemory(null, srcAddress, null, dstAddress, dst.remaining());
+ // after copy, bump the next write position for the dst ByteBuffer
+ dst.position(dst.position() + dst.remaining());
+ } else if (dst.hasArray()) {
+ // copy dst.remaining() bytes of data from this ArrowBuf starting
+ // at address srcAddress into the dst ByteBuffer starting at
+ // index dstIndex
+ final int dstIndex = dst.arrayOffset() + dst.position();
+ MemoryUtil.UNSAFE.copyMemory(
+ null, srcAddress, dst.array(), MemoryUtil.BYTE_ARRAY_BASE_OFFSET + dstIndex, dst.remaining());
+ // after copy, bump the next write position for the dst ByteBuffer
+ dst.position(dst.position() + dst.remaining());
+ } else {
+ throw new UnsupportedOperationException("Copy from this ArrowBuf to ByteBuffer is not supported");
+ }
+ }
+ }
+
+ /**
+ * Copy data into this ArrowBuf at a given index onwards from
+ * a source ByteBuffer.
+ * @param index index index (0 based relative to the portion of memory
+ * this ArrowBuf has access to)
+ * @param src src ByteBuffer where the data will be copied from
+ */
+ public void setBytes(long index, ByteBuffer src) {
+ // bound check for this ArrowBuf where the data will be copied into
+ checkIndex(index, src.remaining());
+ // length of data to copy
+ int length = src.remaining();
+ // address in this ArrowBuf where the data will be copied to
+ long dstAddress = addr(index);
+ if (length != 0) {
+ if (src.isDirect()) {
+ // copy src.remaining() bytes of data from src ByteBuffer starting at
+ // address srcAddress into this ArrowBuf starting at address dstAddress
+ final long srcAddress = MemoryUtil.getByteBufferAddress(src) + src.position();
+ MemoryUtil.UNSAFE.copyMemory(null, srcAddress, null, dstAddress, length);
+ // after copy, bump the next read position for the src ByteBuffer
+ src.position(src.position() + length);
+ } else if (src.hasArray()) {
+ // copy src.remaining() bytes of data from src ByteBuffer starting at
+ // index srcIndex into this ArrowBuf starting at address dstAddress
+ final int srcIndex = src.arrayOffset() + src.position();
+ MemoryUtil.UNSAFE.copyMemory(
+ src.array(), MemoryUtil.BYTE_ARRAY_BASE_OFFSET + srcIndex, null, dstAddress, length);
+ // after copy, bump the next read position for the src ByteBuffer
+ src.position(src.position() + length);
+ } else {
+ final ByteOrder originalByteOrder = src.order();
+ src.order(order());
+ try {
+ // copy word at a time
+ while (length - 128 >= LONG_SIZE) {
+ for (int x = 0; x < 16; x++) {
+ MemoryUtil.UNSAFE.putLong(dstAddress, src.getLong());
+ length -= LONG_SIZE;
+ dstAddress += LONG_SIZE;
+ }
+ }
+ while (length >= LONG_SIZE) {
+ MemoryUtil.UNSAFE.putLong(dstAddress, src.getLong());
+ length -= LONG_SIZE;
+ dstAddress += LONG_SIZE;
+ }
+ // copy last byte
+ while (length > 0) {
+ MemoryUtil.UNSAFE.putByte(dstAddress, src.get());
+ --length;
+ ++dstAddress;
+ }
+ } finally {
+ src.order(originalByteOrder);
+ }
+ }
+ }
+ }
+
+ /**
+ * Copy data into this ArrowBuf at a given index onwards from
+ * a source ByteBuffer starting at a given srcIndex for a certain
+ * length.
+ * @param index index (0 based relative to the portion of memory
+ * this ArrowBuf has access to)
+ * @param src src ByteBuffer where the data will be copied from
+ * @param srcIndex starting index in the src ByteBuffer where the data copy
+ * will start from
+ * @param length length of data to copy from src ByteBuffer
+ */
+ public void setBytes(long index, ByteBuffer src, int srcIndex, int length) {
+ // bound check for this ArrowBuf where the data will be copied into
+ checkIndex(index, length);
+ if (src.isDirect()) {
+ // copy length bytes of data from src ByteBuffer starting at address
+ // srcAddress into this ArrowBuf at address dstAddress
+ final long srcAddress = MemoryUtil.getByteBufferAddress(src) + srcIndex;
+ final long dstAddress = addr(index);
+ MemoryUtil.UNSAFE.copyMemory(null, srcAddress, null, dstAddress, length);
+ } else {
+ if (srcIndex == 0 && src.capacity() == length) {
+ // copy the entire ByteBuffer from start to end of length
+ setBytes(index, src);
+ } else {
+ ByteBuffer newBuf = src.duplicate();
+ newBuf.position(srcIndex);
+ newBuf.limit(srcIndex + length);
+ setBytes(index, newBuf);
+ }
+ }
+ }
+
+ /**
+ * Copy a given length of data from this ArrowBuf starting at a given index
+ * into a dst ArrowBuf at dstIndex.
+ * @param index index (0 based relative to the portion of memory
+ * this ArrowBuf has access to)
+ * @param dst dst ArrowBuf where the data will be copied into
+ * @param dstIndex index (0 based relative to the portion of memory
+ * dst ArrowBuf has access to)
+ * @param length length of data to copy
+ */
+ public void getBytes(long index, ArrowBuf dst, long dstIndex, int length) {
+ // bound check for this ArrowBuf where the data will be copied from
+ checkIndex(index, length);
+ // bound check for this ArrowBuf where the data will be copied into
+ Preconditions.checkArgument(dst != null, "expecting a valid ArrowBuf");
+ // bound check for dst ArrowBuf
+ if (isOutOfBounds(dstIndex, length, dst.capacity())) {
+ throw new IndexOutOfBoundsException(String.format("index: %d, length: %d (expected: range(0, %d))",
+ dstIndex, length, dst.capacity()));
+ }
+ if (length != 0) {
+ // copy length bytes of data from this ArrowBuf starting at
+ // address srcAddress into dst ArrowBuf starting at address
+ // dstAddress
+ final long srcAddress = addr(index);
+ final long dstAddress = dst.memoryAddress() + (long) dstIndex;
+ MemoryUtil.UNSAFE.copyMemory(null, srcAddress, null, dstAddress, length);
+ }
+ }
+
+ /**
+ * Copy data from src ArrowBuf starting at index srcIndex into this
+ * ArrowBuf at given index.
+ * @param index index index (0 based relative to the portion of memory
+ * this ArrowBuf has access to)
+ * @param src src ArrowBuf where the data will be copied from
+ * @param srcIndex starting index in the src ArrowBuf where the copy
+ * will begin from
+ * @param length length of data to copy from src ArrowBuf
+ */
+ public void setBytes(long index, ArrowBuf src, long srcIndex, long length) {
+ // bound check for this ArrowBuf where the data will be copied into
+ checkIndex(index, length);
+ // null check
+ Preconditions.checkArgument(src != null, "expecting a valid ArrowBuf");
+ // bound check for src ArrowBuf
+ if (isOutOfBounds(srcIndex, length, src.capacity())) {
+ throw new IndexOutOfBoundsException(String.format("index: %d, length: %d (expected: range(0, %d))",
+ index, length, src.capacity()));
+ }
+ if (length != 0) {
+ // copy length bytes of data from src ArrowBuf starting at
+ // address srcAddress into this ArrowBuf starting at address
+ // dstAddress
+ final long srcAddress = src.memoryAddress() + srcIndex;
+ final long dstAddress = addr(index);
+ MemoryUtil.UNSAFE.copyMemory(null, srcAddress, null, dstAddress, length);
+ }
+ }
+
+ /**
+ * Copy readableBytes() number of bytes from src ArrowBuf
+ * starting from its readerIndex into this ArrowBuf starting
+ * at the given index.
+ * @param index index index (0 based relative to the portion of memory
+ * this ArrowBuf has access to)
+ * @param src src ArrowBuf where the data will be copied from
+ */
+ public void setBytes(long index, ArrowBuf src) {
+ // null check
+ Preconditions.checkArgument(src != null, "expecting valid ArrowBuf");
+ final long length = src.readableBytes();
+ // bound check for this ArrowBuf where the data will be copied into
+ checkIndex(index, length);
+ final long srcAddress = src.memoryAddress() + src.readerIndex;
+ final long dstAddress = addr(index);
+ MemoryUtil.UNSAFE.copyMemory(null, srcAddress, null, dstAddress, length);
+ src.readerIndex(src.readerIndex + length);
+ }
+
+ /**
+ * Copy a certain length of bytes from given InputStream
+ * into this ArrowBuf at the provided index.
+ * @param index index index (0 based relative to the portion of memory
+ * this ArrowBuf has access to)
+ * @param in src stream to copy from
+ * @param length length of data to copy
+ * @return number of bytes copied from stream into ArrowBuf
+ * @throws IOException on failing to read from stream
+ */
+ public int setBytes(long index, InputStream in, int length) throws IOException {
+ Preconditions.checkArgument(in != null, "expecting valid input stream");
+ checkIndex(index, length);
+ int readBytes = 0;
+ if (length > 0) {
+ byte[] tmp = new byte[length];
+ // read the data from input stream into tmp byte array
+ readBytes = in.read(tmp);
+ if (readBytes > 0) {
+ // copy readBytes length of data from the tmp byte array starting
+ // at srcIndex 0 into this ArrowBuf starting at address addr(index)
+ MemoryUtil.UNSAFE.copyMemory(tmp, MemoryUtil.BYTE_ARRAY_BASE_OFFSET, null, addr(index), readBytes);
+ }
+ }
+ return readBytes;
+ }
+
+ /**
+ * Copy a certain length of bytes from this ArrowBuf at a given
+ * index into the given OutputStream.
+ * @param index index index (0 based relative to the portion of memory
+ * this ArrowBuf has access to)
+ * @param out dst stream to copy data into
+ * @param length length of data to copy
+ * @throws IOException on failing to write to stream
+ */
+ public void getBytes(long index, OutputStream out, int length) throws IOException {
+ Preconditions.checkArgument(out != null, "expecting valid output stream");
+ checkIndex(index, length);
+ if (length > 0) {
+ // copy length bytes of data from this ArrowBuf starting at
+ // address addr(index) into the tmp byte array starting at index 0
+ byte[] tmp = new byte[length];
+ MemoryUtil.UNSAFE.copyMemory(null, addr(index), tmp, MemoryUtil.BYTE_ARRAY_BASE_OFFSET, length);
+ // write the copied data to output stream
+ out.write(tmp);
+ }
+ }
+
+ @Override
+ public void close() {
+ referenceManager.release();
+ }
+
+ /**
+ * Returns the possible memory consumed by this ArrowBuf in the worse case scenario.
+ * (not shared, connected to larger underlying buffer of allocated memory)
+ * @return Size in bytes.
+ */
+ public long getPossibleMemoryConsumed() {
+ return referenceManager.getSize();
+ }
+
+ /**
+ * Return that is Accounted for by this buffer (and its potentially shared siblings within the
+ * context of the associated allocator).
+ * @return Size in bytes.
+ */
+ public long getActualMemoryConsumed() {
+ return referenceManager.getAccountedSize();
+ }
+
+ /**
+ * Return the buffer's byte contents in the form of a hex dump.
+ *
+ * @param start the starting byte index
+ * @param length how many bytes to log
+ * @return A hex dump in a String.
+ */
+ public String toHexString(final long start, final int length) {
+ final long roundedStart = (start / LOG_BYTES_PER_ROW) * LOG_BYTES_PER_ROW;
+
+ final StringBuilder sb = new StringBuilder("buffer byte dump\n");
+ long index = roundedStart;
+ for (long nLogged = 0; nLogged < length; nLogged += LOG_BYTES_PER_ROW) {
+ sb.append(String.format(" [%05d-%05d]", index, index + LOG_BYTES_PER_ROW - 1));
+ for (int i = 0; i < LOG_BYTES_PER_ROW; ++i) {
+ try {
+ final byte b = getByte(index++);
+ sb.append(String.format(" 0x%02x", b));
+ } catch (IndexOutOfBoundsException ioob) {
+ sb.append(" <ioob>");
+ }
+ }
+ sb.append('\n');
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Get the integer id assigned to this ArrowBuf for debugging purposes.
+ * @return integer id
+ */
+ public long getId() {
+ return id;
+ }
+
+ /**
+ * Prints information of this buffer into <code>sb</code> at the given
+ * indentation and verbosity level.
+ *
+ * <p>It will include history if BaseAllocator.DEBUG is true and
+ * the verbosity.includeHistoricalLog are true.
+ *
+ */
+ public void print(StringBuilder sb, int indent, Verbosity verbosity) {
+ CommonUtil.indent(sb, indent).append(toString());
+
+ if (BaseAllocator.DEBUG && verbosity.includeHistoricalLog) {
+ sb.append("\n");
+ historicalLog.buildHistory(sb, indent + 1, verbosity.includeStackTraces);
+ }
+ }
+
+ /**
+ * Get the index at which the next byte will be read from.
+ * @return reader index
+ */
+ public long readerIndex() {
+ return readerIndex;
+ }
+
+ /**
+ * Get the index at which next byte will be written to.
+ * @return writer index
+ */
+ public long writerIndex() {
+ return writerIndex;
+ }
+
+ /**
+ * Set the reader index for this ArrowBuf.
+ * @param readerIndex new reader index
+ * @return this ArrowBuf
+ */
+ public ArrowBuf readerIndex(long readerIndex) {
+ this.readerIndex = readerIndex;
+ return this;
+ }
+
+ /**
+ * Set the writer index for this ArrowBuf.
+ * @param writerIndex new writer index
+ * @return this ArrowBuf
+ */
+ public ArrowBuf writerIndex(long writerIndex) {
+ this.writerIndex = writerIndex;
+ return this;
+ }
+
+ /**
+ * Zero-out the bytes in this ArrowBuf starting at
+ * the given index for the given length.
+ * @param index index index (0 based relative to the portion of memory
+ * this ArrowBuf has access to)
+ * @param length length of bytes to zero-out
+ * @return this ArrowBuf
+ */
+ public ArrowBuf setZero(long index, long length) {
+ if (length != 0) {
+ this.checkIndex(index, length);
+ MemoryUtil.UNSAFE.setMemory(this.addr + index, length, (byte) 0);
+ }
+ return this;
+ }
+
+ /**
+ * Sets all bits to one in the specified range.
+ * @param index index index (0 based relative to the portion of memory
+ * this ArrowBuf has access to)
+ * @param length length of bytes to set.
+ * @return this ArrowBuf
+ */
+ public ArrowBuf setOne(int index, int length) {
+ if (length != 0) {
+ this.checkIndex(index, length);
+ MemoryUtil.UNSAFE.setMemory(this.addr + index, length, (byte) 0xff);
+ }
+ return this;
+ }
+
+ /**
+ * Returns <code>this</code> if size is less then {@link #capacity()}, otherwise
+ * delegates to {@link BufferManager#replace(ArrowBuf, long)} to get a new buffer.
+ */
+ public ArrowBuf reallocIfNeeded(final long size) {
+ Preconditions.checkArgument(size >= 0, "reallocation size must be non-negative");
+ if (this.capacity() >= size) {
+ return this;
+ }
+ if (bufferManager != null) {
+ return bufferManager.replace(this, size);
+ } else {
+ throw new UnsupportedOperationException(
+ "Realloc is only available in the context of operator's UDFs");
+ }
+ }
+
+ public ArrowBuf clear() {
+ this.readerIndex = this.writerIndex = 0;
+ return this;
+ }
+}
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java
new file mode 100644
index 000000000..8d21cef7a
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java
@@ -0,0 +1,951 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.IdentityHashMap;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.arrow.memory.rounding.DefaultRoundingPolicy;
+import org.apache.arrow.memory.rounding.RoundingPolicy;
+import org.apache.arrow.memory.util.AssertionUtil;
+import org.apache.arrow.memory.util.CommonUtil;
+import org.apache.arrow.memory.util.HistoricalLog;
+import org.apache.arrow.util.Preconditions;
+import org.immutables.value.Value;
+
+/**
+ * A base-class that implements all functionality of {@linkplain BufferAllocator}s.
+ *
+ * <p>The class is abstract to enforce usage of {@linkplain RootAllocator}/{@linkplain ChildAllocator}
+ * facades.
+ */
+abstract class BaseAllocator extends Accountant implements BufferAllocator {
+
+ public static final String DEBUG_ALLOCATOR = "arrow.memory.debug.allocator";
+ public static final int DEBUG_LOG_LENGTH = 6;
+ public static final boolean DEBUG;
+ private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(BaseAllocator.class);
+
+ // Initialize this before DEFAULT_CONFIG as DEFAULT_CONFIG will eventually initialize the allocation manager,
+ // which in turn allocates an ArrowBuf, which requires DEBUG to have been properly initialized
+ static {
+ // the system property takes precedence.
+ String propValue = System.getProperty(DEBUG_ALLOCATOR);
+ if (propValue != null) {
+ DEBUG = Boolean.parseBoolean(propValue);
+ } else {
+ DEBUG = AssertionUtil.isAssertionsEnabled();
+ }
+ logger.info("Debug mode " + (DEBUG ? "enabled." : "disabled."));
+ }
+
+ public static final Config DEFAULT_CONFIG = ImmutableConfig.builder().build();
+
+ // Package exposed for sharing between AllocatorManger and BaseAllocator objects
+ private final String name;
+ private final RootAllocator root;
+ private final Object DEBUG_LOCK = DEBUG ? new Object() : null;
+ private final AllocationListener listener;
+ private final BaseAllocator parentAllocator;
+ private final Map<BaseAllocator, Object> childAllocators;
+ private final ArrowBuf empty;
+ // members used purely for debugging
+ private final IdentityHashMap<BufferLedger, Object> childLedgers;
+ private final IdentityHashMap<Reservation, Object> reservations;
+ private final HistoricalLog historicalLog;
+ private final RoundingPolicy roundingPolicy;
+ private final AllocationManager.Factory allocationManagerFactory;
+
+ private volatile boolean isClosed = false; // the allocator has been closed
+
+ /**
+ * Initialize an allocator.
+ *
+ * @param parentAllocator parent allocator. null if defining a root allocator
+ * @param name name of this allocator
+ * @param config configuration including other options of this allocator
+ *
+ * @see Config
+ */
+ protected BaseAllocator(
+ final BaseAllocator parentAllocator,
+ final String name,
+ final Config config) throws OutOfMemoryException {
+ super(parentAllocator, name, config.getInitReservation(), config.getMaxAllocation());
+
+ this.listener = config.getListener();
+ this.allocationManagerFactory = config.getAllocationManagerFactory();
+
+ if (parentAllocator != null) {
+ this.root = parentAllocator.root;
+ empty = parentAllocator.empty;
+ } else if (this instanceof RootAllocator) {
+ this.root = (RootAllocator) this;
+ empty = createEmpty();
+ } else {
+ throw new IllegalStateException("An parent allocator must either carry a root or be the " +
+ "root.");
+ }
+
+ this.parentAllocator = parentAllocator;
+ this.name = name;
+
+ this.childAllocators = Collections.synchronizedMap(new IdentityHashMap<>());
+
+ if (DEBUG) {
+ reservations = new IdentityHashMap<>();
+ childLedgers = new IdentityHashMap<>();
+ historicalLog = new HistoricalLog(DEBUG_LOG_LENGTH, "allocator[%s]", name);
+ hist("created by \"%s\", owned = %d", name, this.getAllocatedMemory());
+ } else {
+ reservations = null;
+ historicalLog = null;
+ childLedgers = null;
+ }
+ this.roundingPolicy = config.getRoundingPolicy();
+ }
+
+ @Override
+ public AllocationListener getListener() {
+ return listener;
+ }
+
+ @Override
+ public BaseAllocator getParentAllocator() {
+ return parentAllocator;
+ }
+
+ @Override
+ public Collection<BufferAllocator> getChildAllocators() {
+ synchronized (childAllocators) {
+ return new HashSet<>(childAllocators.keySet());
+ }
+ }
+
+ private static String createErrorMsg(final BufferAllocator allocator, final long rounded, final long requested) {
+ if (rounded != requested) {
+ return String.format(
+ "Unable to allocate buffer of size %d (rounded from %d) due to memory limit. Current " +
+ "allocation: %d", rounded, requested, allocator.getAllocatedMemory());
+ } else {
+ return String.format(
+ "Unable to allocate buffer of size %d due to memory limit. Current " +
+ "allocation: %d", rounded, allocator.getAllocatedMemory());
+ }
+ }
+
+ public static boolean isDebug() {
+ return DEBUG;
+ }
+
+ @Override
+ public void assertOpen() {
+ if (AssertionUtil.ASSERT_ENABLED) {
+ if (isClosed) {
+ throw new IllegalStateException("Attempting operation on allocator when allocator is closed.\n" +
+ toVerboseString());
+ }
+ }
+ }
+
+ @Override
+ public String getName() {
+ return name;
+ }
+
+ @Override
+ public ArrowBuf getEmpty() {
+ return empty;
+ }
+
+ /**
+ * For debug/verification purposes only. Allows an AllocationManager to tell the allocator that
+ * we have a new ledger
+ * associated with this allocator.
+ */
+ void associateLedger(BufferLedger ledger) {
+ assertOpen();
+ if (DEBUG) {
+ synchronized (DEBUG_LOCK) {
+ childLedgers.put(ledger, null);
+ }
+ }
+ }
+
+ /**
+ * For debug/verification purposes only. Allows an AllocationManager to tell the allocator that
+ * we are removing a
+ * ledger associated with this allocator
+ */
+ void dissociateLedger(BufferLedger ledger) {
+ assertOpen();
+ if (DEBUG) {
+ synchronized (DEBUG_LOCK) {
+ if (!childLedgers.containsKey(ledger)) {
+ throw new IllegalStateException("Trying to remove a child ledger that doesn't exist.");
+ }
+ childLedgers.remove(ledger);
+ }
+ }
+ }
+
+ /**
+ * Track when a ChildAllocator of this BaseAllocator is closed. Used for debugging purposes.
+ *
+ * @param childAllocator The child allocator that has been closed.
+ */
+ private void childClosed(final BaseAllocator childAllocator) {
+ assertOpen();
+
+ if (DEBUG) {
+ Preconditions.checkArgument(childAllocator != null, "child allocator can't be null");
+
+ synchronized (DEBUG_LOCK) {
+ final Object object = childAllocators.remove(childAllocator);
+ if (object == null) {
+ childAllocator.historicalLog.logHistory(logger);
+ throw new IllegalStateException("Child allocator[" + childAllocator.name +
+ "] not found in parent allocator[" + name + "]'s childAllocators");
+ }
+ }
+ } else {
+ childAllocators.remove(childAllocator);
+ }
+ listener.onChildRemoved(this, childAllocator);
+ }
+
+ @Override
+ public ArrowBuf buffer(final long initialRequestSize) {
+ assertOpen();
+
+ return buffer(initialRequestSize, null);
+ }
+
+ private ArrowBuf createEmpty() {
+ return allocationManagerFactory.empty();
+ }
+
+ @Override
+ public ArrowBuf buffer(final long initialRequestSize, BufferManager manager) {
+ assertOpen();
+
+ Preconditions.checkArgument(initialRequestSize >= 0, "the requested size must be non-negative");
+
+ if (initialRequestSize == 0) {
+ return getEmpty();
+ }
+
+ // round the request size according to the rounding policy
+ final long actualRequestSize = roundingPolicy.getRoundedSize(initialRequestSize);
+
+ listener.onPreAllocation(actualRequestSize);
+
+ AllocationOutcome outcome = this.allocateBytes(actualRequestSize);
+ if (!outcome.isOk()) {
+ if (listener.onFailedAllocation(actualRequestSize, outcome)) {
+ // Second try, in case the listener can do something about it
+ outcome = this.allocateBytes(actualRequestSize);
+ }
+ if (!outcome.isOk()) {
+ throw new OutOfMemoryException(createErrorMsg(this, actualRequestSize,
+ initialRequestSize), outcome.getDetails());
+ }
+ }
+
+ boolean success = false;
+ try {
+ ArrowBuf buffer = bufferWithoutReservation(actualRequestSize, manager);
+ success = true;
+ listener.onAllocation(actualRequestSize);
+ return buffer;
+ } catch (OutOfMemoryError e) {
+ throw e;
+ } finally {
+ if (!success) {
+ releaseBytes(actualRequestSize);
+ }
+ }
+ }
+
+ /**
+ * Used by usual allocation as well as for allocating a pre-reserved buffer.
+ * Skips the typical accounting associated with creating a new buffer.
+ */
+ private ArrowBuf bufferWithoutReservation(
+ final long size,
+ BufferManager bufferManager) throws OutOfMemoryException {
+ assertOpen();
+
+ final AllocationManager manager = newAllocationManager(size);
+ final BufferLedger ledger = manager.associate(this); // +1 ref cnt (required)
+ final ArrowBuf buffer = ledger.newArrowBuf(size, bufferManager);
+
+ // make sure that our allocation is equal to what we expected.
+ Preconditions.checkArgument(buffer.capacity() == size,
+ "Allocated capacity %d was not equal to requested capacity %d.", buffer.capacity(), size);
+
+ return buffer;
+ }
+
+ private AllocationManager newAllocationManager(long size) {
+ return newAllocationManager(this, size);
+ }
+
+
+ private AllocationManager newAllocationManager(BaseAllocator accountingAllocator, long size) {
+ return allocationManagerFactory.create(accountingAllocator, size);
+ }
+
+ @Override
+ public BufferAllocator getRoot() {
+ return root;
+ }
+
+ @Override
+ public BufferAllocator newChildAllocator(
+ final String name,
+ final long initReservation,
+ final long maxAllocation) {
+ return newChildAllocator(name, this.listener, initReservation, maxAllocation);
+ }
+
+ @Override
+ public BufferAllocator newChildAllocator(
+ final String name,
+ final AllocationListener listener,
+ final long initReservation,
+ final long maxAllocation) {
+ assertOpen();
+
+ final ChildAllocator childAllocator =
+ new ChildAllocator(this, name, configBuilder()
+ .listener(listener)
+ .initReservation(initReservation)
+ .maxAllocation(maxAllocation)
+ .roundingPolicy(roundingPolicy)
+ .allocationManagerFactory(allocationManagerFactory)
+ .build());
+
+ if (DEBUG) {
+ synchronized (DEBUG_LOCK) {
+ childAllocators.put(childAllocator, childAllocator);
+ historicalLog.recordEvent("allocator[%s] created new child allocator[%s]", name,
+ childAllocator.getName());
+ }
+ } else {
+ childAllocators.put(childAllocator, childAllocator);
+ }
+ this.listener.onChildAdded(this, childAllocator);
+
+ return childAllocator;
+ }
+
+ @Override
+ public AllocationReservation newReservation() {
+ assertOpen();
+
+ return new Reservation();
+ }
+
+ @Override
+ public synchronized void close() {
+ /*
+ * Some owners may close more than once because of complex cleanup and shutdown
+ * procedures.
+ */
+ if (isClosed) {
+ return;
+ }
+
+ isClosed = true;
+
+ StringBuilder outstandingChildAllocators = new StringBuilder();
+ if (DEBUG) {
+ synchronized (DEBUG_LOCK) {
+ verifyAllocator();
+
+ // are there outstanding child allocators?
+ if (!childAllocators.isEmpty()) {
+ for (final BaseAllocator childAllocator : childAllocators.keySet()) {
+ if (childAllocator.isClosed) {
+ logger.warn(String.format(
+ "Closed child allocator[%s] on parent allocator[%s]'s child list.\n%s",
+ childAllocator.name, name, toString()));
+ }
+ }
+
+ throw new IllegalStateException(
+ String.format("Allocator[%s] closed with outstanding child allocators.\n%s", name,
+ toString()));
+ }
+
+ // are there outstanding buffers?
+ final int allocatedCount = childLedgers.size();
+ if (allocatedCount > 0) {
+ throw new IllegalStateException(
+ String.format("Allocator[%s] closed with outstanding buffers allocated (%d).\n%s",
+ name, allocatedCount, toString()));
+ }
+
+ if (reservations.size() != 0) {
+ throw new IllegalStateException(
+ String.format("Allocator[%s] closed with outstanding reservations (%d).\n%s", name,
+ reservations.size(),
+ toString()));
+ }
+
+ }
+ } else {
+ if (!childAllocators.isEmpty()) {
+ outstandingChildAllocators.append("Outstanding child allocators : \n");
+ synchronized (childAllocators) {
+ for (final BaseAllocator childAllocator : childAllocators.keySet()) {
+ outstandingChildAllocators.append(String.format(" %s", childAllocator.toString()));
+ }
+ }
+ }
+ }
+
+ // Is there unaccounted-for outstanding allocation?
+ final long allocated = getAllocatedMemory();
+ if (allocated > 0) {
+ if (parent != null && reservation > allocated) {
+ parent.releaseBytes(reservation - allocated);
+ }
+ String msg = String.format("Memory was leaked by query. Memory leaked: (%d)\n%s%s", allocated,
+ outstandingChildAllocators.toString(), toString());
+ logger.error(msg);
+ throw new IllegalStateException(msg);
+ }
+
+ // we need to release our memory to our parent before we tell it we've closed.
+ super.close();
+
+ // Inform our parent allocator that we've closed
+ if (parentAllocator != null) {
+ parentAllocator.childClosed(this);
+ }
+
+ if (DEBUG) {
+ historicalLog.recordEvent("closed");
+ logger.debug(String.format("closed allocator[%s].", name));
+ }
+
+
+ }
+
+ @Override
+ public String toString() {
+ final Verbosity verbosity = logger.isTraceEnabled() ? Verbosity.LOG_WITH_STACKTRACE
+ : Verbosity.BASIC;
+ final StringBuilder sb = new StringBuilder();
+ print(sb, 0, verbosity);
+ return sb.toString();
+ }
+
+ /**
+ * Provide a verbose string of the current allocator state. Includes the state of all child
+ * allocators, along with
+ * historical logs of each object and including stacktraces.
+ *
+ * @return A Verbose string of current allocator state.
+ */
+ @Override
+ public String toVerboseString() {
+ final StringBuilder sb = new StringBuilder();
+ print(sb, 0, Verbosity.LOG_WITH_STACKTRACE);
+ return sb.toString();
+ }
+
+ private void hist(String noteFormat, Object... args) {
+ historicalLog.recordEvent(noteFormat, args);
+ }
+
+ /**
+ * Verifies the accounting state of the allocator. Only works for DEBUG.
+ *
+ * @throws IllegalStateException when any problems are found
+ */
+ void verifyAllocator() {
+ final IdentityHashMap<AllocationManager, BaseAllocator> seen = new IdentityHashMap<>();
+ verifyAllocator(seen);
+ }
+
+ /**
+ * Verifies the accounting state of the allocator (Only works for DEBUG)
+ * This overload is used for recursive calls, allowing for checking
+ * that ArrowBufs are unique across all allocators that are checked.
+ *
+ * @param buffersSeen a map of buffers that have already been seen when walking a tree of
+ * allocators
+ * @throws IllegalStateException when any problems are found
+ */
+ private void verifyAllocator(
+ final IdentityHashMap<AllocationManager, BaseAllocator> buffersSeen) {
+ // The remaining tests can only be performed if we're in debug mode.
+ if (!DEBUG) {
+ return;
+ }
+
+ synchronized (DEBUG_LOCK) {
+ final long allocated = getAllocatedMemory();
+
+ // verify my direct descendants
+ final Set<BaseAllocator> childSet = childAllocators.keySet();
+ for (final BaseAllocator childAllocator : childSet) {
+ childAllocator.verifyAllocator(buffersSeen);
+ }
+
+ /*
+ * Verify my relationships with my descendants.
+ *
+ * The sum of direct child allocators' owned memory must be <= my allocated memory; my
+ * allocated memory also
+ * includes ArrowBuf's directly allocated by me.
+ */
+ long childTotal = 0;
+ for (final BaseAllocator childAllocator : childSet) {
+ childTotal += Math.max(childAllocator.getAllocatedMemory(), childAllocator.reservation);
+ }
+ if (childTotal > getAllocatedMemory()) {
+ historicalLog.logHistory(logger);
+ logger.debug("allocator[" + name + "] child event logs BEGIN");
+ for (final BaseAllocator childAllocator : childSet) {
+ childAllocator.historicalLog.logHistory(logger);
+ }
+ logger.debug("allocator[" + name + "] child event logs END");
+ throw new IllegalStateException(
+ "Child allocators own more memory (" + childTotal + ") than their parent (name = " +
+ name + " ) has allocated (" + getAllocatedMemory() + ')');
+ }
+
+ // Furthermore, the amount I've allocated should be that plus buffers I've allocated.
+ long bufferTotal = 0;
+
+ final Set<BufferLedger> ledgerSet = childLedgers.keySet();
+ for (final BufferLedger ledger : ledgerSet) {
+ if (!ledger.isOwningLedger()) {
+ continue;
+ }
+
+ final AllocationManager am = ledger.getAllocationManager();
+ /*
+ * Even when shared, ArrowBufs are rewrapped, so we should never see the same instance
+ * twice.
+ */
+ final BaseAllocator otherOwner = buffersSeen.get(am);
+ if (otherOwner != null) {
+ throw new IllegalStateException("This allocator's ArrowBuf already owned by another " +
+ "allocator");
+ }
+ buffersSeen.put(am, this);
+
+ bufferTotal += am.getSize();
+ }
+
+ // Preallocated space has to be accounted for
+ final Set<Reservation> reservationSet = reservations.keySet();
+ long reservedTotal = 0;
+ for (final Reservation reservation : reservationSet) {
+ if (!reservation.isUsed()) {
+ reservedTotal += reservation.getSize();
+ }
+ }
+
+ if (bufferTotal + reservedTotal + childTotal != getAllocatedMemory()) {
+ final StringBuilder sb = new StringBuilder();
+ sb.append("allocator[");
+ sb.append(name);
+ sb.append("]\nallocated: ");
+ sb.append(Long.toString(allocated));
+ sb.append(" allocated - (bufferTotal + reservedTotal + childTotal): ");
+ sb.append(Long.toString(allocated - (bufferTotal + reservedTotal + childTotal)));
+ sb.append('\n');
+
+ if (bufferTotal != 0) {
+ sb.append("buffer total: ");
+ sb.append(Long.toString(bufferTotal));
+ sb.append('\n');
+ dumpBuffers(sb, ledgerSet);
+ }
+
+ if (childTotal != 0) {
+ sb.append("child total: ");
+ sb.append(Long.toString(childTotal));
+ sb.append('\n');
+
+ for (final BaseAllocator childAllocator : childSet) {
+ sb.append("child allocator[");
+ sb.append(childAllocator.name);
+ sb.append("] owned ");
+ sb.append(Long.toString(childAllocator.getAllocatedMemory()));
+ sb.append('\n');
+ }
+ }
+
+ if (reservedTotal != 0) {
+ sb.append(String.format("reserved total : %d bytes.", reservedTotal));
+ for (final Reservation reservation : reservationSet) {
+ reservation.historicalLog.buildHistory(sb, 0, true);
+ sb.append('\n');
+ }
+ }
+
+ logger.debug(sb.toString());
+
+ final long allocated2 = getAllocatedMemory();
+
+ if (allocated2 != allocated) {
+ throw new IllegalStateException(String.format(
+ "allocator[%s]: allocated t1 (%d) + allocated t2 (%d). Someone released memory while in verification.",
+ name, allocated, allocated2));
+
+ }
+ throw new IllegalStateException(String.format(
+ "allocator[%s]: buffer space (%d) + prealloc space (%d) + child space (%d) != allocated (%d)",
+ name, bufferTotal, reservedTotal, childTotal, allocated));
+ }
+ }
+ }
+
+ void print(StringBuilder sb, int level, Verbosity verbosity) {
+
+ CommonUtil.indent(sb, level)
+ .append("Allocator(")
+ .append(name)
+ .append(") ")
+ .append(reservation)
+ .append('/')
+ .append(getAllocatedMemory())
+ .append('/')
+ .append(getPeakMemoryAllocation())
+ .append('/')
+ .append(getLimit())
+ .append(" (res/actual/peak/limit)")
+ .append('\n');
+
+ if (DEBUG) {
+ CommonUtil.indent(sb, level + 1).append(String.format("child allocators: %d\n", childAllocators.size()));
+ for (BaseAllocator child : childAllocators.keySet()) {
+ child.print(sb, level + 2, verbosity);
+ }
+
+ CommonUtil.indent(sb, level + 1).append(String.format("ledgers: %d\n", childLedgers.size()));
+ for (BufferLedger ledger : childLedgers.keySet()) {
+ ledger.print(sb, level + 2, verbosity);
+ }
+
+ final Set<Reservation> reservations = this.reservations.keySet();
+ CommonUtil.indent(sb, level + 1).append(String.format("reservations: %d\n", reservations.size()));
+ for (final Reservation reservation : reservations) {
+ if (verbosity.includeHistoricalLog) {
+ reservation.historicalLog.buildHistory(sb, level + 3, true);
+ }
+ }
+
+ }
+
+ }
+
+ private void dumpBuffers(final StringBuilder sb, final Set<BufferLedger> ledgerSet) {
+ for (final BufferLedger ledger : ledgerSet) {
+ if (!ledger.isOwningLedger()) {
+ continue;
+ }
+ final AllocationManager am = ledger.getAllocationManager();
+ sb.append("UnsafeDirectLittleEndian[identityHashCode == ");
+ sb.append(Integer.toString(System.identityHashCode(am)));
+ sb.append("] size ");
+ sb.append(Long.toString(am.getSize()));
+ sb.append('\n');
+ }
+ }
+
+ /**
+ * Enum for logging verbosity.
+ */
+ public enum Verbosity {
+ BASIC(false, false), // only include basic information
+ LOG(true, false), // include basic
+ LOG_WITH_STACKTRACE(true, true) //
+ ;
+
+ public final boolean includeHistoricalLog;
+ public final boolean includeStackTraces;
+
+ Verbosity(boolean includeHistoricalLog, boolean includeStackTraces) {
+ this.includeHistoricalLog = includeHistoricalLog;
+ this.includeStackTraces = includeStackTraces;
+ }
+ }
+
+ /**
+ * Returns a default {@link Config} instance.
+ *
+ * @see ImmutableConfig.Builder
+ */
+ public static Config defaultConfig() {
+ return DEFAULT_CONFIG;
+
+ }
+
+ /**
+ * Returns a builder class for configuring BaseAllocator's options.
+ */
+ public static ImmutableConfig.Builder configBuilder() {
+ return ImmutableConfig.builder();
+ }
+
+ @Override
+ public RoundingPolicy getRoundingPolicy() {
+ return roundingPolicy;
+ }
+
+ /**
+ * Config class of {@link BaseAllocator}.
+ */
+ @Value.Immutable
+ abstract static class Config {
+ /**
+ * Factory for creating {@link AllocationManager} instances.
+ */
+ @Value.Default
+ AllocationManager.Factory getAllocationManagerFactory() {
+ return DefaultAllocationManagerOption.getDefaultAllocationManagerFactory();
+ }
+
+ /**
+ * Listener callback. Must be non-null.
+ */
+ @Value.Default
+ AllocationListener getListener() {
+ return AllocationListener.NOOP;
+ }
+
+ /**
+ * Initial reservation size (in bytes) for this allocator.
+ */
+ @Value.Default
+ long getInitReservation() {
+ return 0;
+ }
+
+ /**
+ * Max allocation size (in bytes) for this allocator, allocations past this limit fail.
+ * Can be modified after construction.
+ */
+ @Value.Default
+ long getMaxAllocation() {
+ return Long.MAX_VALUE;
+ }
+
+ /**
+ * The policy for rounding the buffer size.
+ */
+ @Value.Default
+ RoundingPolicy getRoundingPolicy() {
+ return DefaultRoundingPolicy.DEFAULT_ROUNDING_POLICY;
+ }
+ }
+
+ /**
+ * Implementation of {@link AllocationReservation} that supports
+ * history tracking under {@linkplain #DEBUG} is true.
+ */
+ public class Reservation implements AllocationReservation {
+
+ private final HistoricalLog historicalLog;
+ private int nBytes = 0;
+ private boolean used = false;
+ private boolean closed = false;
+
+ /**
+ * Creates a new reservation.
+ *
+ * <p>If {@linkplain #DEBUG} is true this will capture a historical
+ * log of events relevant to this Reservation.
+ */
+ public Reservation() {
+ if (DEBUG) {
+ historicalLog = new HistoricalLog("Reservation[allocator[%s], %d]", name, System
+ .identityHashCode(this));
+ historicalLog.recordEvent("created");
+ synchronized (DEBUG_LOCK) {
+ reservations.put(this, this);
+ }
+ } else {
+ historicalLog = null;
+ }
+ }
+
+ @Override
+ public boolean add(final int nBytes) {
+ assertOpen();
+
+ Preconditions.checkArgument(nBytes >= 0, "nBytes(%d) < 0", nBytes);
+ Preconditions.checkState(!closed, "Attempt to increase reservation after reservation has been closed");
+ Preconditions.checkState(!used, "Attempt to increase reservation after reservation has been used");
+
+ // we round up to next power of two since all reservations are done in powers of two. This
+ // may overestimate the
+ // preallocation since someone may perceive additions to be power of two. If this becomes a
+ // problem, we can look
+ // at
+ // modifying this behavior so that we maintain what we reserve and what the user asked for
+ // and make sure to only
+ // round to power of two as necessary.
+ final int nBytesTwo = CommonUtil.nextPowerOfTwo(nBytes);
+ if (!reserve(nBytesTwo)) {
+ return false;
+ }
+
+ this.nBytes += nBytesTwo;
+ return true;
+ }
+
+ @Override
+ public ArrowBuf allocateBuffer() {
+ assertOpen();
+
+ Preconditions.checkState(!closed, "Attempt to allocate after closed");
+ Preconditions.checkState(!used, "Attempt to allocate more than once");
+
+ final ArrowBuf arrowBuf = allocate(nBytes);
+ used = true;
+ return arrowBuf;
+ }
+
+ @Override
+ public int getSize() {
+ return nBytes;
+ }
+
+ @Override
+ public boolean isUsed() {
+ return used;
+ }
+
+ @Override
+ public boolean isClosed() {
+ return closed;
+ }
+
+ @Override
+ public void close() {
+ assertOpen();
+
+ if (closed) {
+ return;
+ }
+
+ if (DEBUG) {
+ if (!isClosed()) {
+ final Object object;
+ synchronized (DEBUG_LOCK) {
+ object = reservations.remove(this);
+ }
+ if (object == null) {
+ final StringBuilder sb = new StringBuilder();
+ print(sb, 0, Verbosity.LOG_WITH_STACKTRACE);
+ logger.debug(sb.toString());
+ throw new IllegalStateException(String.format("Didn't find closing reservation[%d]",
+ System.identityHashCode(this)));
+ }
+
+ historicalLog.recordEvent("closed");
+ }
+ }
+
+ if (!used) {
+ releaseReservation(nBytes);
+ }
+
+ closed = true;
+ }
+
+ @Override
+ public boolean reserve(int nBytes) {
+ assertOpen();
+
+ final AllocationOutcome outcome = BaseAllocator.this.allocateBytes(nBytes);
+
+ if (DEBUG) {
+ historicalLog.recordEvent("reserve(%d) => %s", nBytes, Boolean.toString(outcome.isOk()));
+ }
+
+ return outcome.isOk();
+ }
+
+ /**
+ * Allocate a buffer of the requested size.
+ *
+ * <p>The implementation of the allocator's inner class provides this.
+ *
+ * @param nBytes the size of the buffer requested
+ * @return the buffer, or null, if the request cannot be satisfied
+ */
+ private ArrowBuf allocate(int nBytes) {
+ assertOpen();
+
+ boolean success = false;
+
+ /*
+ * The reservation already added the requested bytes to the allocators owned and allocated
+ * bytes via reserve().
+ * This ensures that they can't go away. But when we ask for the buffer here, that will add
+ * to the allocated bytes
+ * as well, so we need to return the same number back to avoid double-counting them.
+ */
+ try {
+ final ArrowBuf arrowBuf = BaseAllocator.this.bufferWithoutReservation(nBytes, null);
+
+ listener.onAllocation(nBytes);
+ if (DEBUG) {
+ historicalLog.recordEvent("allocate() => %s", String.format("ArrowBuf[%d]", arrowBuf
+ .getId()));
+ }
+ success = true;
+ return arrowBuf;
+ } finally {
+ if (!success) {
+ releaseBytes(nBytes);
+ }
+ }
+ }
+
+ /**
+ * Return the reservation back to the allocator without having used it.
+ *
+ * @param nBytes the size of the reservation
+ */
+ private void releaseReservation(int nBytes) {
+ assertOpen();
+
+ releaseBytes(nBytes);
+
+ if (DEBUG) {
+ historicalLog.recordEvent("releaseReservation(%d)", nBytes);
+ }
+ }
+
+ }
+}
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BoundsChecking.java b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BoundsChecking.java
new file mode 100644
index 000000000..bbf7ff34d
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BoundsChecking.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+/**
+ * Configuration class to determine if bounds checking should be turned on or off.
+ *
+ * <p>
+ * Bounds checking is on by default. You can disable it by setting either the system property or
+ * the environmental variable to "true". The system property can be "arrow.enable_unsafe_memory_access"
+ * or "drill.enable_unsafe_memory_access". The latter is deprecated. The environmental variable is named
+ * "ARROW_ENABLE_UNSAFE_MEMORY_ACCESS".
+ * When both the system property and the environmental variable are set, the system property takes precedence.
+ * </p>
+ */
+public class BoundsChecking {
+
+ public static final boolean BOUNDS_CHECKING_ENABLED;
+ static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(BoundsChecking.class);
+
+ static {
+ String envProperty = System.getenv("ARROW_ENABLE_UNSAFE_MEMORY_ACCESS");
+ String oldProperty = System.getProperty("drill.enable_unsafe_memory_access");
+ if (oldProperty != null) {
+ logger.warn("\"drill.enable_unsafe_memory_access\" has been renamed to \"arrow.enable_unsafe_memory_access\"");
+ logger.warn("\"arrow.enable_unsafe_memory_access\" can be set to: " +
+ " true (to not check) or false (to check, default)");
+ }
+ String newProperty = System.getProperty("arrow.enable_unsafe_memory_access");
+
+ // The priority of determining the unsafe flag:
+ // 1. The system properties take precedence over the environmental variable.
+ // 2. The new system property takes precedence over the new system property.
+ String unsafeFlagValue = newProperty;
+ if (unsafeFlagValue == null) {
+ unsafeFlagValue = oldProperty;
+ }
+ if (unsafeFlagValue == null) {
+ unsafeFlagValue = envProperty;
+ }
+
+ BOUNDS_CHECKING_ENABLED = !"true".equals(unsafeFlagValue);
+ }
+
+ private BoundsChecking() {
+ }
+
+}
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferAllocator.java b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferAllocator.java
new file mode 100644
index 000000000..e59349c64
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferAllocator.java
@@ -0,0 +1,238 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+import java.util.Collection;
+
+import org.apache.arrow.memory.rounding.DefaultRoundingPolicy;
+import org.apache.arrow.memory.rounding.RoundingPolicy;
+
+/**
+ * Wrapper class to deal with byte buffer allocation. Ensures users only use designated methods.
+ */
+public interface BufferAllocator extends AutoCloseable {
+
+ /**
+ * Allocate a new or reused buffer of the provided size. Note that the buffer may technically
+ * be larger than the
+ * requested size for rounding purposes. However, the buffer's capacity will be set to the
+ * configured size.
+ *
+ * @param size The size in bytes.
+ * @return a new ArrowBuf, or null if the request can't be satisfied
+ * @throws OutOfMemoryException if buffer cannot be allocated
+ */
+ ArrowBuf buffer(long size);
+
+ /**
+ * Allocate a new or reused buffer of the provided size. Note that the buffer may technically
+ * be larger than the
+ * requested size for rounding purposes. However, the buffer's capacity will be set to the
+ * configured size.
+ *
+ * @param size The size in bytes.
+ * @param manager A buffer manager to manage reallocation.
+ * @return a new ArrowBuf, or null if the request can't be satisfied
+ * @throws OutOfMemoryException if buffer cannot be allocated
+ */
+ ArrowBuf buffer(long size, BufferManager manager);
+
+ /**
+ * Get the root allocator of this allocator. If this allocator is already a root, return
+ * this directly.
+ *
+ * @return The root allocator
+ */
+ BufferAllocator getRoot();
+
+ /**
+ * Create a new child allocator.
+ *
+ * @param name the name of the allocator.
+ * @param initReservation the initial space reservation (obtained from this allocator)
+ * @param maxAllocation maximum amount of space the new allocator can allocate
+ * @return the new allocator, or null if it can't be created
+ */
+ BufferAllocator newChildAllocator(String name, long initReservation, long maxAllocation);
+
+ /**
+ * Create a new child allocator.
+ *
+ * @param name the name of the allocator.
+ * @param listener allocation listener for the newly created child
+ * @param initReservation the initial space reservation (obtained from this allocator)
+ * @param maxAllocation maximum amount of space the new allocator can allocate
+ * @return the new allocator, or null if it can't be created
+ */
+ BufferAllocator newChildAllocator(
+ String name,
+ AllocationListener listener,
+ long initReservation,
+ long maxAllocation);
+
+ /**
+ * Close and release all buffers generated from this buffer pool.
+ *
+ * <p>When assertions are on, complains if there are any outstanding buffers; to avoid
+ * that, release all buffers before the allocator is closed.</p>
+ */
+ @Override
+ void close();
+
+ /**
+ * Returns the amount of memory currently allocated from this allocator.
+ *
+ * @return the amount of memory currently allocated
+ */
+ long getAllocatedMemory();
+
+ /**
+ * Return the current maximum limit this allocator imposes.
+ *
+ * @return Limit in number of bytes.
+ */
+ long getLimit();
+
+ /**
+ * Return the initial reservation.
+ *
+ * @return reservation in bytes.
+ */
+ long getInitReservation();
+
+ /**
+ * Set the maximum amount of memory this allocator is allowed to allocate.
+ *
+ * @param newLimit The new Limit to apply to allocations
+ */
+ void setLimit(long newLimit);
+
+ /**
+ * Returns the peak amount of memory allocated from this allocator.
+ *
+ * @return the peak amount of memory allocated
+ */
+ long getPeakMemoryAllocation();
+
+ /**
+ * Returns the amount of memory that can probably be allocated at this moment
+ * without exceeding this or any parents allocation maximum.
+ *
+ * @return Headroom in bytes
+ */
+ long getHeadroom();
+
+ /**
+ * Forcibly allocate bytes. Returns whether the allocation fit within limits.
+ *
+ * @param size to increase
+ * @return Whether the allocation fit within limits.
+ */
+ boolean forceAllocate(long size);
+
+
+ /**
+ * Release bytes from this allocator.
+ *
+ * @param size to release
+ */
+ void releaseBytes(long size);
+
+ /**
+ * Returns the allocation listener used by this allocator.
+ *
+ * @return the {@link AllocationListener} instance. Or {@link AllocationListener#NOOP} by default if no listener
+ * is configured when this allocator was created.
+ */
+ AllocationListener getListener();
+
+ /**
+ * Returns the parent allocator.
+ *
+ * @return parent allocator
+ */
+ BufferAllocator getParentAllocator();
+
+ /**
+ * Returns the set of child allocators.
+ *
+ * @return set of child allocators
+ */
+ Collection<BufferAllocator> getChildAllocators();
+
+ /**
+ * Create an allocation reservation. A reservation is a way of building up
+ * a request for a buffer whose size is not known in advance. See
+ *
+ * @return the newly created reservation
+ * @see AllocationReservation
+ */
+ AllocationReservation newReservation();
+
+ /**
+ * Get a reference to the empty buffer associated with this allocator. Empty buffers are
+ * special because we don't
+ * worry about them leaking or managing reference counts on them since they don't actually
+ * point to any memory.
+ *
+ * @return the empty buffer
+ */
+ ArrowBuf getEmpty();
+
+ /**
+ * Return the name of this allocator. This is a human readable name that can help debugging.
+ * Typically provides
+ * coordinates about where this allocator was created
+ *
+ * @return the name of the allocator
+ */
+ String getName();
+
+ /**
+ * Return whether or not this allocator (or one if its parents) is over its limits. In the case
+ * that an allocator is
+ * over its limit, all consumers of that allocator should aggressively try to address the
+ * overlimit situation.
+ *
+ * @return whether or not this allocator (or one if its parents) is over its limits
+ */
+ boolean isOverLimit();
+
+ /**
+ * Return a verbose string describing this allocator. If in DEBUG mode, this will also include
+ * relevant stacktraces
+ * and historical logs for underlying objects
+ *
+ * @return A very verbose description of the allocator hierarchy.
+ */
+ String toVerboseString();
+
+ /**
+ * Asserts (using java assertions) that the provided allocator is currently open. If assertions
+ * are disabled, this is
+ * a no-op.
+ */
+ void assertOpen();
+
+ /**
+ * Gets the rounding policy of the allocator.
+ */
+ default RoundingPolicy getRoundingPolicy() {
+ return DefaultRoundingPolicy.DEFAULT_ROUNDING_POLICY;
+ }
+}
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferLedger.java b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferLedger.java
new file mode 100644
index 000000000..48b3e183d
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferLedger.java
@@ -0,0 +1,525 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+import java.util.IdentityHashMap;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicLong;
+
+import org.apache.arrow.memory.util.CommonUtil;
+import org.apache.arrow.memory.util.HistoricalLog;
+import org.apache.arrow.util.Preconditions;
+
+/**
+ * The reference manager that binds an {@link AllocationManager} to
+ * {@link BufferAllocator} and a set of {@link ArrowBuf}. The set of
+ * ArrowBufs managed by this reference manager share a common
+ * fate (same reference count).
+ */
+public class BufferLedger implements ValueWithKeyIncluded<BufferAllocator>, ReferenceManager {
+ private final IdentityHashMap<ArrowBuf, Object> buffers =
+ BaseAllocator.DEBUG ? new IdentityHashMap<>() : null;
+ private static final AtomicLong LEDGER_ID_GENERATOR = new AtomicLong(0);
+ // unique ID assigned to each ledger
+ private final long ledgerId = LEDGER_ID_GENERATOR.incrementAndGet();
+ private final AtomicInteger bufRefCnt = new AtomicInteger(0); // start at zero so we can
+ // manage request for retain
+ // correctly
+ private final long lCreationTime = System.nanoTime();
+ private final BufferAllocator allocator;
+ private final AllocationManager allocationManager;
+ private final HistoricalLog historicalLog =
+ BaseAllocator.DEBUG ? new HistoricalLog(BaseAllocator.DEBUG_LOG_LENGTH,
+ "BufferLedger[%d]", 1) : null;
+ private volatile long lDestructionTime = 0;
+
+ BufferLedger(final BufferAllocator allocator, final AllocationManager allocationManager) {
+ this.allocator = allocator;
+ this.allocationManager = allocationManager;
+ }
+
+ boolean isOwningLedger() {
+ return this == allocationManager.getOwningLedger();
+ }
+
+ public BufferAllocator getKey() {
+ return allocator;
+ }
+
+ /**
+ * Get the buffer allocator associated with this reference manager.
+ * @return buffer allocator
+ */
+ @Override
+ public BufferAllocator getAllocator() {
+ return allocator;
+ }
+
+ /**
+ * Get this ledger's reference count.
+ * @return reference count
+ */
+ @Override
+ public int getRefCount() {
+ return bufRefCnt.get();
+ }
+
+ /**
+ * Increment the ledger's reference count for the associated
+ * underlying memory chunk. All ArrowBufs managed by this ledger
+ * will share the ref count.
+ */
+ void increment() {
+ bufRefCnt.incrementAndGet();
+ }
+
+ /**
+ * Decrement the ledger's reference count by 1 for the associated underlying
+ * memory chunk. If the reference count drops to 0, it implies that
+ * no ArrowBufs managed by this reference manager need access to the memory
+ * chunk. In that case, the ledger should inform the allocation manager
+ * about releasing its ownership for the chunk. Whether or not the memory
+ * chunk will be released is something that {@link AllocationManager} will
+ * decide since tracks the usage of memory chunk across multiple reference
+ * managers and allocators.
+ * @return true if the new ref count has dropped to 0, false otherwise
+ */
+ @Override
+ public boolean release() {
+ return release(1);
+ }
+
+ /**
+ * Decrement the ledger's reference count for the associated underlying
+ * memory chunk. If the reference count drops to 0, it implies that
+ * no ArrowBufs managed by this reference manager need access to the memory
+ * chunk. In that case, the ledger should inform the allocation manager
+ * about releasing its ownership for the chunk. Whether or not the memory
+ * chunk will be released is something that {@link AllocationManager} will
+ * decide since tracks the usage of memory chunk across multiple reference
+ * managers and allocators.
+ * @param decrement amount to decrease the reference count by
+ * @return true if the new ref count has dropped to 0, false otherwise
+ */
+ @Override
+ public boolean release(int decrement) {
+ Preconditions.checkState(decrement >= 1,
+ "ref count decrement should be greater than or equal to 1");
+ // decrement the ref count
+ final int refCnt = decrement(decrement);
+ if (BaseAllocator.DEBUG) {
+ historicalLog.recordEvent("release(%d). original value: %d",
+ decrement, refCnt + decrement);
+ }
+ // the new ref count should be >= 0
+ Preconditions.checkState(refCnt >= 0, "RefCnt has gone negative");
+ return refCnt == 0;
+ }
+
+ /**
+ * Decrement the ledger's reference count for the associated underlying
+ * memory chunk. If the reference count drops to 0, it implies that
+ * no ArrowBufs managed by this reference manager need access to the memory
+ * chunk. In that case, the ledger should inform the allocation manager
+ * about releasing its ownership for the chunk. Whether or not the memory
+ * chunk will be released is something that {@link AllocationManager} will
+ * decide since tracks the usage of memory chunk across multiple reference
+ * managers and allocators.
+ *
+ * @param decrement amount to decrease the reference count by
+ * @return the new reference count
+ */
+ private int decrement(int decrement) {
+ allocator.assertOpen();
+ final int outcome;
+ synchronized (allocationManager) {
+ outcome = bufRefCnt.addAndGet(-decrement);
+ if (outcome == 0) {
+ lDestructionTime = System.nanoTime();
+ // refcount of this reference manager has dropped to 0
+ // inform the allocation manager that this reference manager
+ // no longer holds references to underlying memory
+ allocationManager.release(this);
+ }
+ }
+ return outcome;
+ }
+
+ /**
+ * Increment the ledger's reference count for associated
+ * underlying memory chunk by 1.
+ */
+ @Override
+ public void retain() {
+ retain(1);
+ }
+
+ /**
+ * Increment the ledger's reference count for associated
+ * underlying memory chunk by the given amount.
+ *
+ * @param increment amount to increase the reference count by
+ */
+ @Override
+ public void retain(int increment) {
+ Preconditions.checkArgument(increment > 0, "retain(%s) argument is not positive", increment);
+ if (BaseAllocator.DEBUG) {
+ historicalLog.recordEvent("retain(%d)", increment);
+ }
+ final int originalReferenceCount = bufRefCnt.getAndAdd(increment);
+ Preconditions.checkArgument(originalReferenceCount > 0);
+ }
+
+ /**
+ * Derive a new ArrowBuf from a given source ArrowBuf. The new derived
+ * ArrowBuf will share the same reference count as rest of the ArrowBufs
+ * associated with this ledger. This operation is typically used for
+ * slicing -- creating new ArrowBufs from a compound ArrowBuf starting at
+ * a particular index in the underlying memory and having access to a
+ * particular length (in bytes) of data in memory chunk.
+ * <p>
+ * This method is also used as a helper for transferring ownership and retain to target
+ * allocator.
+ * </p>
+ * @param sourceBuffer source ArrowBuf
+ * @param index index (relative to source ArrowBuf) new ArrowBuf should be
+ * derived from
+ * @param length length (bytes) of data in underlying memory that derived buffer will
+ * have access to in underlying memory
+ * @return derived buffer
+ */
+ @Override
+ public ArrowBuf deriveBuffer(final ArrowBuf sourceBuffer, long index, long length) {
+ /*
+ * Usage type 1 for deriveBuffer():
+ * Used for slicing where index represents a relative index in the source ArrowBuf
+ * as the slice start point. This is why we need to add the source buffer offset
+ * to compute the start virtual address of derived buffer within the
+ * underlying chunk.
+ *
+ * Usage type 2 for deriveBuffer():
+ * Used for retain(target allocator) and transferOwnership(target allocator)
+ * where index is 0 since these operations simply create a new ArrowBuf associated
+ * with another combination of allocator buffer ledger for the same underlying memory
+ */
+
+ // the memory address stored inside ArrowBuf is its starting virtual
+ // address in the underlying memory chunk from the point it has
+ // access. so it is already accounting for the offset of the source buffer
+ // we can simply add the index to get the starting address of new buffer.
+ final long derivedBufferAddress = sourceBuffer.memoryAddress() + index;
+
+ // create new ArrowBuf
+ final ArrowBuf derivedBuf = new ArrowBuf(
+ this,
+ null,
+ length, // length (in bytes) in the underlying memory chunk for this new ArrowBuf
+ derivedBufferAddress // starting byte address in the underlying memory for this new ArrowBuf
+ );
+
+ // logging
+ if (BaseAllocator.DEBUG) {
+ historicalLog.recordEvent(
+ "ArrowBuf(BufferLedger, BufferAllocator[%s], " +
+ "UnsafeDirectLittleEndian[identityHashCode == " +
+ "%d](%s)) => ledger hc == %d",
+ allocator.getName(), System.identityHashCode(derivedBuf), derivedBuf.toString(),
+ System.identityHashCode(this));
+
+ synchronized (buffers) {
+ buffers.put(derivedBuf, null);
+ }
+ }
+
+ return derivedBuf;
+ }
+
+ /**
+ * Used by an allocator to create a new ArrowBuf. This is provided
+ * as a helper method for the allocator when it allocates a new memory chunk
+ * using a new instance of allocation manager and creates a new reference manager
+ * too.
+ *
+ * @param length The length in bytes that this ArrowBuf will provide access to.
+ * @param manager An optional BufferManager argument that can be used to manage expansion of
+ * this ArrowBuf
+ * @return A new ArrowBuf that shares references with all ArrowBufs associated
+ * with this BufferLedger
+ */
+ ArrowBuf newArrowBuf(final long length, final BufferManager manager) {
+ allocator.assertOpen();
+
+ // the start virtual address of the ArrowBuf will be same as address of memory chunk
+ final long startAddress = allocationManager.memoryAddress();
+
+ // create ArrowBuf
+ final ArrowBuf buf = new ArrowBuf(this, manager, length, startAddress);
+
+ // logging
+ if (BaseAllocator.DEBUG) {
+ historicalLog.recordEvent(
+ "ArrowBuf(BufferLedger, BufferAllocator[%s], " +
+ "UnsafeDirectLittleEndian[identityHashCode == " + "%d](%s)) => ledger hc == %d",
+ allocator.getName(), System.identityHashCode(buf), buf.toString(),
+ System.identityHashCode(this));
+
+ synchronized (buffers) {
+ buffers.put(buf, null);
+ }
+ }
+
+ return buf;
+ }
+
+ /**
+ * Create a new ArrowBuf that is associated with an alternative allocator for the purposes of
+ * memory ownership and accounting. This has no impact on the reference counting for the current
+ * ArrowBuf except in the situation where the passed in Allocator is the same as the current buffer.
+ * <p>
+ * This operation has no impact on the reference count of this ArrowBuf. The newly created
+ * ArrowBuf with either have a reference count of 1 (in the case that this is the first time this
+ * memory is being associated with the target allocator or in other words allocation manager currently
+ * doesn't hold a mapping for the target allocator) or the current value of the reference count for
+ * the target allocator-reference manager combination + 1 in the case that the provided allocator
+ * already had an association to this underlying memory.
+ * </p>
+ *
+ * @param srcBuffer source ArrowBuf
+ * @param target The target allocator to create an association with.
+ * @return A new ArrowBuf which shares the same underlying memory as the provided ArrowBuf.
+ */
+ @Override
+ public ArrowBuf retain(final ArrowBuf srcBuffer, BufferAllocator target) {
+
+ if (BaseAllocator.DEBUG) {
+ historicalLog.recordEvent("retain(%s)", target.getName());
+ }
+
+ // the call to associate will return the corresponding reference manager (buffer ledger) for
+ // the target allocator. if the allocation manager didn't already have a mapping
+ // for the target allocator, it will create one and return the new reference manager with a
+ // reference count of 1. Thus the newly created buffer in this case will have a ref count of 1.
+ // alternatively, if there was already a mapping for <buffer allocator, ref manager> in
+ // allocation manager, the ref count of the new buffer will be targetrefmanager.refcount() + 1
+ // and this will be true for all the existing buffers currently managed by targetrefmanager
+ final BufferLedger targetRefManager = allocationManager.associate(target);
+ // create a new ArrowBuf to associate with new allocator and target ref manager
+ final long targetBufLength = srcBuffer.capacity();
+ ArrowBuf targetArrowBuf = targetRefManager.deriveBuffer(srcBuffer, 0, targetBufLength);
+ targetArrowBuf.readerIndex(srcBuffer.readerIndex());
+ targetArrowBuf.writerIndex(srcBuffer.writerIndex());
+ return targetArrowBuf;
+ }
+
+ /**
+ * Transfer any balance the current ledger has to the target ledger. In the case
+ * that the current ledger holds no memory, no transfer is made to the new ledger.
+ *
+ * @param targetReferenceManager The ledger to transfer ownership account to.
+ * @return Whether transfer fit within target ledgers limits.
+ */
+ boolean transferBalance(final ReferenceManager targetReferenceManager) {
+ Preconditions.checkArgument(targetReferenceManager != null,
+ "Expecting valid target reference manager");
+ final BufferAllocator targetAllocator = targetReferenceManager.getAllocator();
+ Preconditions.checkArgument(allocator.getRoot() == targetAllocator.getRoot(),
+ "You can only transfer between two allocators that share the same root.");
+
+ allocator.assertOpen();
+ targetReferenceManager.getAllocator().assertOpen();
+
+ // if we're transferring to ourself, just return.
+ if (targetReferenceManager == this) {
+ return true;
+ }
+
+ // since two balance transfers out from the allocation manager could cause incorrect
+ // accounting, we need to ensure
+ // that this won't happen by synchronizing on the allocation manager instance.
+ synchronized (allocationManager) {
+ if (allocationManager.getOwningLedger() != this) {
+ // since the calling reference manager is not the owning
+ // reference manager for the underlying memory, transfer is
+ // a NO-OP
+ return true;
+ }
+
+ if (BaseAllocator.DEBUG) {
+ this.historicalLog.recordEvent("transferBalance(%s)",
+ targetReferenceManager.getAllocator().getName());
+ }
+
+ boolean overlimit = targetAllocator.forceAllocate(allocationManager.getSize());
+ allocator.releaseBytes(allocationManager.getSize());
+ // since the transfer can only happen from the owning reference manager,
+ // we need to set the target ref manager as the new owning ref manager
+ // for the chunk of memory in allocation manager
+ allocationManager.setOwningLedger((BufferLedger) targetReferenceManager);
+ return overlimit;
+ }
+ }
+
+ /**
+ * Transfer the memory accounting ownership of this ArrowBuf to another allocator.
+ * This will generate a new ArrowBuf that carries an association with the underlying memory
+ * of this ArrowBuf. If this ArrowBuf is connected to the owning BufferLedger of this memory,
+ * that memory ownership/accounting will be transferred to the target allocator. If this
+ * ArrowBuf does not currently own the memory underlying it (and is only associated with it),
+ * this does not transfer any ownership to the newly created ArrowBuf.
+ * <p>
+ * This operation has no impact on the reference count of this ArrowBuf. The newly created
+ * ArrowBuf with either have a reference count of 1 (in the case that this is the first time
+ * this memory is being associated with the new allocator) or the current value of the reference
+ * count for the other AllocationManager/BufferLedger combination + 1 in the case that the provided
+ * allocator already had an association to this underlying memory.
+ * </p>
+ * <p>
+ * Transfers will always succeed, even if that puts the other allocator into an overlimit
+ * situation. This is possible due to the fact that the original owning allocator may have
+ * allocated this memory out of a local reservation whereas the target allocator may need to
+ * allocate new memory from a parent or RootAllocator. This operation is done n a mostly-lockless
+ * but consistent manner. As such, the overlimit==true situation could occur slightly prematurely
+ * to an actual overlimit==true condition. This is simply conservative behavior which means we may
+ * return overlimit slightly sooner than is necessary.
+ * </p>
+ *
+ * @param target The allocator to transfer ownership to.
+ * @return A new transfer result with the impact of the transfer (whether it was overlimit) as
+ * well as the newly created ArrowBuf.
+ */
+ @Override
+ public TransferResult transferOwnership(final ArrowBuf srcBuffer, final BufferAllocator target) {
+ // the call to associate will return the corresponding reference manager (buffer ledger) for
+ // the target allocator. if the allocation manager didn't already have a mapping
+ // for the target allocator, it will create one and return the new reference manager with a
+ // reference count of 1. Thus the newly created buffer in this case will have a ref count of 1.
+ // alternatively, if there was already a mapping for <buffer allocator, ref manager> in
+ // allocation manager, the ref count of the new buffer will be targetrefmanager.refcount() + 1
+ // and this will be true for all the existing buffers currently managed by targetrefmanager
+ final BufferLedger targetRefManager = allocationManager.associate(target);
+ // create a new ArrowBuf to associate with new allocator and target ref manager
+ final long targetBufLength = srcBuffer.capacity();
+ final ArrowBuf targetArrowBuf = targetRefManager.deriveBuffer(srcBuffer, 0, targetBufLength);
+ targetArrowBuf.readerIndex(srcBuffer.readerIndex());
+ targetArrowBuf.writerIndex(srcBuffer.writerIndex());
+ final boolean allocationFit = transferBalance(targetRefManager);
+ return new TransferResult(allocationFit, targetArrowBuf);
+ }
+
+ /**
+ * The outcome of a Transfer.
+ */
+ public class TransferResult implements OwnershipTransferResult {
+
+ // Whether this transfer fit within the target allocator's capacity.
+ final boolean allocationFit;
+
+ // The newly created buffer associated with the target allocator
+ public final ArrowBuf buffer;
+
+ private TransferResult(boolean allocationFit, ArrowBuf buffer) {
+ this.allocationFit = allocationFit;
+ this.buffer = buffer;
+ }
+
+ @Override
+ public ArrowBuf getTransferredBuffer() {
+ return buffer;
+ }
+
+ @Override
+ public boolean getAllocationFit() {
+ return allocationFit;
+ }
+ }
+
+ /**
+ * Total size (in bytes) of memory underlying this reference manager.
+ * @return Size (in bytes) of the memory chunk
+ */
+ @Override
+ public long getSize() {
+ return allocationManager.getSize();
+ }
+
+ /**
+ * How much memory is accounted for by this ledger. This is either getSize()
+ * if this is the owning ledger for the memory or zero in the case that this
+ * is not the owning ledger associated with this memory.
+ * @return Amount of accounted(owned) memory associated with this ledger.
+ */
+ @Override
+ public long getAccountedSize() {
+ synchronized (allocationManager) {
+ if (allocationManager.getOwningLedger() == this) {
+ return allocationManager.getSize();
+ } else {
+ return 0;
+ }
+ }
+ }
+
+ /**
+ * Print the current ledger state to the provided StringBuilder.
+ *
+ * @param sb The StringBuilder to populate.
+ * @param indent The level of indentation to position the data.
+ * @param verbosity The level of verbosity to print.
+ */
+ void print(StringBuilder sb, int indent, BaseAllocator.Verbosity verbosity) {
+ CommonUtil.indent(sb, indent)
+ .append("ledger[")
+ .append(ledgerId)
+ .append("] allocator: ")
+ .append(allocator.getName())
+ .append("), isOwning: ")
+ .append(", size: ")
+ .append(", references: ")
+ .append(bufRefCnt.get())
+ .append(", life: ")
+ .append(lCreationTime)
+ .append("..")
+ .append(lDestructionTime)
+ .append(", allocatorManager: [")
+ .append(", life: ");
+
+ if (!BaseAllocator.DEBUG) {
+ sb.append("]\n");
+ } else {
+ synchronized (buffers) {
+ sb.append("] holds ")
+ .append(buffers.size())
+ .append(" buffers. \n");
+ for (ArrowBuf buf : buffers.keySet()) {
+ buf.print(sb, indent + 2, verbosity);
+ sb.append('\n');
+ }
+ }
+ }
+ }
+
+ /**
+ * Get the {@link AllocationManager} used by this BufferLedger.
+ *
+ * @return The AllocationManager used by this BufferLedger.
+ */
+ public AllocationManager getAllocationManager() {
+ return allocationManager;
+ }
+
+}
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferManager.java b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferManager.java
new file mode 100644
index 000000000..6b622e719
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferManager.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+/**
+ * Manages a list of {@link ArrowBuf}s that can be reallocated as needed. Upon
+ * re-allocation the old buffer will be freed. Managing a list of these buffers
+ * prevents some parts of the system from needing to define a correct location
+ * to place the final call to free them.
+ */
+public interface BufferManager extends AutoCloseable {
+
+ /**
+ * Replace an old buffer with a new version at least of the provided size. Does not copy data.
+ *
+ * @param old Old Buffer that the user is no longer going to use.
+ * @param newSize Size of new replacement buffer.
+ * @return A new version of the buffer.
+ */
+ ArrowBuf replace(ArrowBuf old, long newSize);
+
+ /**
+ * Get a managed buffer of indeterminate size.
+ *
+ * @return A buffer.
+ */
+ ArrowBuf getManagedBuffer();
+
+ /**
+ * Get a managed buffer of at least a certain size.
+ *
+ * @param size The desired size
+ * @return A buffer
+ */
+ ArrowBuf getManagedBuffer(long size);
+
+ void close();
+}
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/CheckAllocator.java b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/CheckAllocator.java
new file mode 100644
index 000000000..79b825aa2
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/CheckAllocator.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+import java.io.IOException;
+import java.net.URL;
+import java.util.Enumeration;
+import java.util.LinkedHashSet;
+import java.util.Set;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Static method to ensure we have a RootAllocator on the classpath and report which one is used.
+ */
+final class CheckAllocator {
+ private static final Logger logger = LoggerFactory.getLogger(CheckAllocator.class);
+ private static final String ALLOCATOR_PATH = "org/apache/arrow/memory/DefaultAllocationManagerFactory.class";
+
+ private CheckAllocator() {
+
+ }
+
+ static String check() {
+ Set<URL> urls = scanClasspath();
+ URL rootAllocator = assertOnlyOne(urls);
+ reportResult(rootAllocator);
+ return "org.apache.arrow.memory.DefaultAllocationManagerFactory";
+ }
+
+
+ private static Set<URL> scanClasspath() {
+ // LinkedHashSet appropriate here because it preserves insertion order
+ // during iteration
+ Set<URL> allocatorPathSet = new LinkedHashSet<>();
+ try {
+ ClassLoader allocatorClassLoader = CheckAllocator.class.getClassLoader();
+ Enumeration<URL> paths;
+ if (allocatorClassLoader == null) {
+ paths = ClassLoader.getSystemResources(ALLOCATOR_PATH);
+ } else {
+ paths = allocatorClassLoader.getResources(ALLOCATOR_PATH);
+ }
+ while (paths.hasMoreElements()) {
+ URL path = paths.nextElement();
+ allocatorPathSet.add(path);
+ }
+ } catch (IOException ioe) {
+ logger.error("Error getting resources from path", ioe);
+ }
+ return allocatorPathSet;
+ }
+
+ private static void reportResult(URL rootAllocator) {
+ String path = rootAllocator.getPath();
+ String subPath = path.substring(path.indexOf("memory"));
+ logger.info("Using DefaultAllocationManager at {}", subPath);
+ }
+
+ private static URL assertOnlyOne(Set<URL> urls) {
+ if (urls.size() > 1) {
+ logger.warn("More than one DefaultAllocationManager on classpath. Choosing first found");
+ }
+ if (urls.isEmpty()) {
+ throw new RuntimeException("No DefaultAllocationManager found on classpath. Can't allocate Arrow buffers." +
+ " Please consider adding arrow-memory-netty or arrow-memory-unsafe as a dependency.");
+ }
+ return urls.iterator().next();
+ }
+
+}
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ChildAllocator.java b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ChildAllocator.java
new file mode 100644
index 000000000..67156f89d
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ChildAllocator.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+
+/**
+ * Child allocator class. Only slightly different from the {@see RootAllocator},
+ * in that these can't be created directly, but must be obtained from
+ * {@see BufferAllocator#newChildAllocator(AllocatorOwner, long, long, int)}.
+ *
+ * <p>Child allocators can only be created by the root, or other children, so
+ * this class is package private.</p>
+ */
+class ChildAllocator extends BaseAllocator {
+
+ /**
+ * Constructor.
+ *
+ * @param parentAllocator parent allocator -- the one creating this child
+ * @param name the name of this child allocator
+ * @param config configuration of this child allocator
+ */
+ ChildAllocator(
+ BaseAllocator parentAllocator,
+ String name,
+ Config config) {
+ super(parentAllocator, name, config);
+ }
+}
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/DefaultAllocationManagerOption.java b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/DefaultAllocationManagerOption.java
new file mode 100644
index 000000000..15120c252
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/DefaultAllocationManagerOption.java
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+import java.lang.reflect.Field;
+
+/**
+ * A class for choosing the default allocation manager.
+ */
+public class DefaultAllocationManagerOption {
+
+ /**
+ * The environmental variable to set the default allocation manager type.
+ */
+ public static final String ALLOCATION_MANAGER_TYPE_ENV_NAME = "ARROW_ALLOCATION_MANAGER_TYPE";
+
+ /**
+ * The system property to set the default allocation manager type.
+ */
+ public static final String ALLOCATION_MANAGER_TYPE_PROPERTY_NAME = "arrow.allocation.manager.type";
+
+ static final org.slf4j.Logger LOGGER = org.slf4j.LoggerFactory.getLogger(DefaultAllocationManagerOption.class);
+
+ /**
+ * The default allocation manager factory.
+ */
+ private static AllocationManager.Factory DEFAULT_ALLOCATION_MANAGER_FACTORY = null;
+
+ /**
+ * The allocation manager type.
+ */
+ public enum AllocationManagerType {
+ /**
+ * Netty based allocation manager.
+ */
+ Netty,
+
+ /**
+ * Unsafe based allocation manager.
+ */
+ Unsafe,
+
+ /**
+ * Unknown type.
+ */
+ Unknown,
+ }
+
+ static AllocationManagerType getDefaultAllocationManagerType() {
+ AllocationManagerType ret = AllocationManagerType.Unknown;
+
+ try {
+ String envValue = System.getenv(ALLOCATION_MANAGER_TYPE_ENV_NAME);
+ ret = AllocationManagerType.valueOf(envValue);
+ } catch (IllegalArgumentException | NullPointerException e) {
+ // ignore the exception, and make the allocation manager type remain unchanged
+ }
+
+ // system property takes precedence
+ try {
+ String propValue = System.getProperty(ALLOCATION_MANAGER_TYPE_PROPERTY_NAME);
+ ret = AllocationManagerType.valueOf(propValue);
+ } catch (IllegalArgumentException | NullPointerException e) {
+ // ignore the exception, and make the allocation manager type remain unchanged
+ }
+ return ret;
+ }
+
+ static AllocationManager.Factory getDefaultAllocationManagerFactory() {
+ if (DEFAULT_ALLOCATION_MANAGER_FACTORY != null) {
+ return DEFAULT_ALLOCATION_MANAGER_FACTORY;
+ }
+ AllocationManagerType type = getDefaultAllocationManagerType();
+ switch (type) {
+ case Netty:
+ DEFAULT_ALLOCATION_MANAGER_FACTORY = getNettyFactory();
+ break;
+ case Unsafe:
+ DEFAULT_ALLOCATION_MANAGER_FACTORY = getUnsafeFactory();
+ break;
+ case Unknown:
+ LOGGER.info("allocation manager type not specified, using netty as the default type");
+ DEFAULT_ALLOCATION_MANAGER_FACTORY = getFactory(CheckAllocator.check());
+ break;
+ default:
+ throw new IllegalStateException("Unknown allocation manager type: " + type);
+ }
+ return DEFAULT_ALLOCATION_MANAGER_FACTORY;
+ }
+
+ private static AllocationManager.Factory getFactory(String clazzName) {
+ try {
+ Field field = Class.forName(clazzName).getDeclaredField("FACTORY");
+ field.setAccessible(true);
+ return (AllocationManager.Factory) field.get(null);
+ } catch (Exception e) {
+ throw new RuntimeException("Unable to instantiate Allocation Manager for " + clazzName, e);
+ }
+ }
+
+ private static AllocationManager.Factory getUnsafeFactory() {
+ try {
+ return getFactory("org.apache.arrow.memory.UnsafeAllocationManager");
+ } catch (RuntimeException e) {
+ throw new RuntimeException("Please add arrow-memory-unsafe to your classpath," +
+ " No DefaultAllocationManager found to instantiate an UnsafeAllocationManager", e);
+ }
+ }
+
+ private static AllocationManager.Factory getNettyFactory() {
+ try {
+ return getFactory("org.apache.arrow.memory.NettyAllocationManager");
+ } catch (RuntimeException e) {
+ throw new RuntimeException("Please add arrow-memory-netty to your classpath," +
+ " No DefaultAllocationManager found to instantiate an NettyAllocationManager", e);
+ }
+ }
+}
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/LowCostIdentityHashMap.java b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/LowCostIdentityHashMap.java
new file mode 100644
index 000000000..edfa82392
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/LowCostIdentityHashMap.java
@@ -0,0 +1,336 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.util.VisibleForTesting;
+
+/**
+ * Highly specialized IdentityHashMap that implements only partial
+ * Map APIs.
+ * It incurs low initial cost (just two elements by default).
+ * It assumes Value includes the Key - Implements @ValueWithKeyIncluded iface
+ * that provides "getKey" method.
+ *
+ * @param <K> Key type
+ * @param <V> Value type
+ */
+public class LowCostIdentityHashMap<K, V extends ValueWithKeyIncluded<K>> {
+
+ /*
+ * The internal data structure to hold values.
+ */
+ private Object[] elementData;
+
+ /* Actual number of values. */
+ private int size;
+
+ /*
+ * maximum number of elements that can be put in this map before having to
+ * rehash.
+ */
+ private int threshold;
+
+ private static final int DEFAULT_MIN_SIZE = 1;
+
+ /* Default load factor of 0.75; */
+ private static final int LOAD_FACTOR = 7500;
+
+ /**
+ * Creates a Map with default expected maximum size.
+ */
+ public LowCostIdentityHashMap() {
+ this(DEFAULT_MIN_SIZE);
+ }
+
+ /**
+ * Creates a Map with the specified maximum size parameter.
+ *
+ * @param maxSize
+ * The estimated maximum number of entries that will be put in
+ * this map.
+ */
+ public LowCostIdentityHashMap(int maxSize) {
+ if (maxSize >= 0) {
+ this.size = 0;
+ threshold = getThreshold(maxSize);
+ elementData = newElementArray(computeElementArraySize());
+ } else {
+ throw new IllegalArgumentException();
+ }
+ }
+
+ private int getThreshold(int maxSize) {
+ // assign the threshold to maxSize initially, this will change to a
+ // higher value if rehashing occurs.
+ return maxSize > 2 ? maxSize : 2;
+ }
+
+ private int computeElementArraySize() {
+ int arraySize = (int) (((long) threshold * 10000) / LOAD_FACTOR);
+ // ensure arraySize is positive, the above cast from long to int type
+ // leads to overflow and negative arraySize if threshold is too big
+ return arraySize < 0 ? -arraySize : arraySize;
+ }
+
+ /**
+ * Create a new element array.
+ *
+ * @param s
+ * the number of elements
+ * @return Reference to the element array
+ */
+ private Object[] newElementArray(int s) {
+ return new Object[s];
+ }
+
+ /**
+ * Removes all elements from this map, leaving it empty.
+ *
+ * @see #isEmpty()
+ * @see #size()
+ */
+ public void clear() {
+ size = 0;
+ for (int i = 0; i < elementData.length; i++) {
+ elementData[i] = null;
+ }
+ }
+
+ /**
+ * Returns whether this map contains the specified key.
+ *
+ * @param key
+ * the key to search for.
+ * @return {@code true} if this map contains the specified key,
+ * {@code false} otherwise.
+ */
+ public boolean containsKey(K key) {
+ Preconditions.checkNotNull(key);
+
+ int index = findIndex(key, elementData);
+ return (elementData[index] == null) ? false : ((V) elementData[index]).getKey() == key;
+ }
+
+ /**
+ * Returns whether this map contains the specified value.
+ *
+ * @param value
+ * the value to search for.
+ * @return {@code true} if this map contains the specified value,
+ * {@code false} otherwise.
+ */
+ public boolean containsValue(V value) {
+ Preconditions.checkNotNull(value);
+
+ for (int i = 0; i < elementData.length; i++) {
+ if (elementData[i] == value) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Returns the value of the mapping with the specified key.
+ *
+ * @param key the key.
+ * @return the value of the mapping with the specified key.
+ */
+ public V get(K key) {
+ Preconditions.checkNotNull(key);
+
+ int index = findIndex(key, elementData);
+
+ return (elementData[index] == null) ? null :
+ (((V) elementData[index]).getKey() == key) ? (V) elementData[index] : null;
+ }
+
+ /**
+ * Returns the index where the key is found at, or the index of the next
+ * empty spot if the key is not found in this table.
+ */
+ @VisibleForTesting
+ int findIndex(Object key, Object[] array) {
+ int length = array.length;
+ int index = getModuloHash(key, length);
+ int last = (index + length - 1) % length;
+ while (index != last) {
+ if ((array[index] == null) || ((V) array[index]).getKey() == key) {
+ /*
+ * Found the key, or the next empty spot (which means key is not
+ * in the table)
+ */
+ break;
+ }
+ index = (index + 1) % length;
+ }
+ return index;
+ }
+
+ @VisibleForTesting
+ static int getModuloHash(Object key, int length) {
+ return ((System.identityHashCode(key) & 0x7FFFFFFF) % length);
+ }
+
+ /**
+ * Maps the specified key to the specified value.
+ *
+ * @param value the value.
+ * @return the value of any previous mapping with the specified key or
+ * {@code null} if there was no such mapping.
+ */
+ public V put(V value) {
+ Preconditions.checkNotNull(value);
+ K key = value.getKey();
+ Preconditions.checkNotNull(key);
+
+ int index = findIndex(key, elementData);
+
+ // if the key doesn't exist in the table
+ if (elementData[index] == null || ((V) elementData[index]).getKey() != key) {
+ if (++size > threshold) {
+ rehash();
+ index = findIndex(key, elementData);
+ }
+
+ // insert the key and assign the value to null initially
+ elementData[index] = null;
+ }
+
+ // insert value to where it needs to go, return the old value
+ Object result = elementData[index];
+ elementData[index] = value;
+
+ return (V) result;
+ }
+
+ @VisibleForTesting
+ void rehash() {
+ int newlength = elementData.length * 15 / 10;
+ if (newlength == 0) {
+ newlength = 1;
+ }
+ Object[] newData = newElementArray(newlength);
+ for (int i = 0; i < elementData.length; i++) {
+ Object key = (elementData[i] == null) ? null : ((V) elementData[i]).getKey();
+ if (key != null) {
+ // if not empty
+ int index = findIndex(key, newData);
+ newData[index] = elementData[i];
+ }
+ }
+ elementData = newData;
+ computeMaxSize();
+ }
+
+ private void computeMaxSize() {
+ threshold = (int) ((long) (elementData.length) * LOAD_FACTOR / 10000);
+ }
+
+ /**
+ * Removes the mapping with the specified key from this map.
+ *
+ * @param key the key of the mapping to remove.
+ * @return the value of the removed mapping, or {@code null} if no mapping
+ * for the specified key was found.
+ */
+ public V remove(K key) {
+ Preconditions.checkNotNull(key);
+
+ boolean hashedOk;
+ int index;
+ int next;
+ int hash;
+ Object result;
+ Object object;
+ index = next = findIndex(key, elementData);
+
+ if (elementData[index] == null || ((V) elementData[index]).getKey() != key) {
+ return null;
+ }
+
+ // store the value for this key
+ result = elementData[index];
+ // clear value to allow movement of the rest of the elements
+ elementData[index] = null;
+ size--;
+
+ // shift the following elements up if needed
+ // until we reach an empty spot
+ int length = elementData.length;
+ while (true) {
+ next = (next + 1) % length;
+ object = elementData[next];
+ if (object == null) {
+ break;
+ }
+
+ hash = getModuloHash(((V) object).getKey(), length);
+ hashedOk = hash > index;
+ if (next < index) {
+ hashedOk = hashedOk || (hash <= next);
+ } else {
+ hashedOk = hashedOk && (hash <= next);
+ }
+ if (!hashedOk) {
+ elementData[index] = object;
+ index = next;
+ elementData[index] = null;
+ }
+ }
+
+ return (V) result;
+ }
+
+
+
+ /**
+ * Returns whether this Map has no elements.
+ *
+ * @return {@code true} if this Map has no elements,
+ * {@code false} otherwise.
+ * @see #size()
+ */
+ public boolean isEmpty() {
+ return size == 0;
+ }
+
+ /**
+ * Returns the number of mappings in this Map.
+ *
+ * @return the number of mappings in this Map.
+ */
+ public int size() {
+ return size;
+ }
+
+ /**
+ * Special API to return next value - substitute of regular Map.values.iterator().next().
+ *
+ * @return next available value or null if none available
+ */
+ public V getNextValue() {
+ for (int i = 0; i < elementData.length; i++) {
+ if (elementData[i] != null) {
+ return (V) elementData[i];
+ }
+ }
+ return null;
+ }
+}
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/OutOfMemoryException.java b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/OutOfMemoryException.java
new file mode 100644
index 000000000..841ffcd8f
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/OutOfMemoryException.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+import java.util.Optional;
+
+/**
+ * Indicates memory could not be allocated for Arrow buffers.
+ *
+ * <p>This is different from {@linkplain OutOfMemoryError} which indicates the JVM
+ * is out of memory. This error indicates that static limit of one of Arrow's
+ * allocators (e.g. {@linkplain BaseAllocator}) has been exceeded.
+ */
+public class OutOfMemoryException extends RuntimeException {
+
+ static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(OutOfMemoryException
+ .class);
+ private static final long serialVersionUID = -6858052345185793382L;
+ private Optional<AllocationOutcomeDetails> outcomeDetails = Optional.empty();
+
+ public OutOfMemoryException() {
+ super();
+ }
+
+ public OutOfMemoryException(String message, Throwable cause, boolean enableSuppression, boolean
+ writableStackTrace) {
+ super(message, cause, enableSuppression, writableStackTrace);
+ }
+
+ public OutOfMemoryException(String message, Throwable cause) {
+ super(message, cause);
+
+ }
+
+ public OutOfMemoryException(String message) {
+ super(message);
+ }
+
+ public OutOfMemoryException(String message, Optional<AllocationOutcomeDetails> details) {
+ super(message);
+ this.outcomeDetails = details;
+ }
+
+ public OutOfMemoryException(Throwable cause) {
+ super(cause);
+
+ }
+
+ public Optional<AllocationOutcomeDetails> getOutcomeDetails() {
+ return outcomeDetails;
+ }
+}
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/OwnershipTransferNOOP.java b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/OwnershipTransferNOOP.java
new file mode 100644
index 000000000..d4fed8448
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/OwnershipTransferNOOP.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+/**
+ * An {@link OwnershipTransferResult} indicating no transfer needed.
+ */
+public class OwnershipTransferNOOP implements OwnershipTransferResult {
+ private final ArrowBuf buffer;
+
+ OwnershipTransferNOOP(final ArrowBuf buf) {
+ this.buffer = buf;
+ }
+
+ @Override
+ public ArrowBuf getTransferredBuffer() {
+ return buffer;
+ }
+
+ @Override
+ public boolean getAllocationFit() {
+ return true;
+ }
+}
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/OwnershipTransferResult.java b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/OwnershipTransferResult.java
new file mode 100644
index 000000000..ef857d827
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/OwnershipTransferResult.java
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+/**
+ * The result of transferring an {@link ArrowBuf} between {@linkplain BufferAllocator}s.
+ */
+public interface OwnershipTransferResult {
+
+ boolean getAllocationFit();
+
+ ArrowBuf getTransferredBuffer();
+}
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/README.md b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/README.md
new file mode 100644
index 000000000..f5f924ce8
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/README.md
@@ -0,0 +1,121 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+# Memory: Allocation, Accounting and Management
+
+The memory management package contains all the memory allocation related items that Arrow uses to manage memory.
+
+
+## Key Components
+Memory management can be broken into the following main components:
+
+- Memory chunk allocation and fragmentation management
+ - `PooledByteBufAllocatorL` - A LittleEndian clone of Netty's jemalloc implementation
+ - `UnsafeDirectLittleEndian` - A base level memory access interface
+ - `LargeBuffer` - A buffer backing implementation used when working with data larger than one Netty chunk (default to 16mb)
+- Memory limits & Accounting
+ - `Accountant` - A nestable class of lockfree memory accountors.
+- Application-level memory allocation
+ - `BufferAllocator` - The public interface application users should be leveraging
+ - `BaseAllocator` - The base implementation of memory allocation, contains the meat of our the Arrow allocator implementation
+ - `RootAllocator` - The root allocator. Typically only one created for a JVM
+ - `ChildAllocator` - A child allocator that derives from the root allocator
+- Buffer ownership and transfer capabilities
+ - `AllocationManager` - Responsible for managing the relationship between multiple allocators and a single chunk of memory
+ - `BufferLedger` - Responsible for allowing maintaining the relationship between an `AllocationManager`, a `BufferAllocator` and one or more individual `ArrowBuf`s
+- Memory access
+ - `ArrowBuf` - The facade for interacting directly with a chunk of memory.
+
+
+## Memory Management Overview
+Arrow's memory model is based on the following basic concepts:
+
+ - Memory can be allocated up to some limit. That limit could be a real limit (OS/JVM) or a locally imposed limit.
+ - Allocation operates in two phases: accounting then actual allocation. Allocation could fail at either point.
+ - Allocation failure should be recoverable. In all cases, the Allocator infrastructure should expose memory allocation failures (OS or internal limit-based) as `OutOfMemoryException`s.
+ - Any allocator can reserve memory when created. This memory shall be held such that this allocator will always be able to allocate that amount of memory.
+ - A particular application component should work to use a local allocator to understand local memory usage and better debug memory leaks.
+ - The same physical memory can be shared by multiple allocators and the allocator must provide an accounting paradigm for this purpose.
+
+## Allocator Trees
+
+Arrow provides a tree-based model for memory allocation. The RootAllocator is created first, then all allocators are created as children of that allocator. The RootAllocator is responsible for being the master bookkeeper for memory allocations. All other allocators are created as children of this tree. Each allocator can first determine whether it has enough local memory to satisfy a particular request. If not, the allocator can ask its parent for an additional memory allocation.
+
+## Reserving Memory
+
+Arrow provides two different ways to reserve memory:
+
+ - BufferAllocator accounting reservations:
+ When a new allocator (other than the `RootAllocator`) is initialized, it can set aside memory that it will keep locally for its lifetime. This is memory that will never be released back to its parent allocator until the allocator is closed.
+ - `AllocationReservation` via BufferAllocator.newReservation(): Allows a short-term preallocation strategy so that a particular subsystem can ensure future memory is available to support a particular request.
+
+## Memory Ownership, Reference Counts and Sharing
+Many BufferAllocators can reference the same piece of memory at the same time. The most common situation for this is in the case of a Broadcast Join: in this situation many downstream operators in the same Arrowbit will receive the same physical memory. Each of these operators will be operating within its own Allocator context. We therefore have multiple allocators all pointing at the same physical memory. It is the AllocationManager's responsibility to ensure that in this situation, that all memory is accurately accounted for from the Root's perspective and also to ensure that the memory is correctly released once all BufferAllocators have stopped using that memory.
+
+For simplicity of accounting, we treat that memory as being used by one of the BufferAllocators associated with the memory. When that allocator releases its claim on that memory, the memory ownership is then moved to another BufferLedger belonging to the same AllocationManager. Note that because a ArrowBuf.release() is what actually causes memory ownership transfer to occur, we always precede with ownership transfer (even if that violates an allocator limit). It is the responsibility of the application owning a particular allocator to frequently confirm whether the allocator is over its memory limit (BufferAllocator.isOverLimit()) and if so, attempt to aggressively release memory to ameliorate the situation.
+
+All ArrowBufs (direct or sliced) related to a single BufferLedger/BufferAllocator combination share the same reference count and either all will be valid or all will be invalid.
+
+## Object Hierarchy
+
+There are two main ways that someone can look at the object hierarchy for Arrow's memory management scheme. The first is a memory based perspective as below:
+
+### Memory Perspective
+<pre>
++ AllocationManager
+|
+|-- UnsignedDirectLittleEndian (One per AllocationManager)
+|
+|-+ BufferLedger 1 ==> Allocator A (owning)
+| ` - ArrowBuf 1
+|-+ BufferLedger 2 ==> Allocator B (non-owning)
+| ` - ArrowBuf 2
+|-+ BufferLedger 3 ==> Allocator C (non-owning)
+ | - ArrowBuf 3
+ | - ArrowBuf 4
+ ` - ArrowBuf 5
+</pre>
+
+In this picture, a piece of memory is owned by an allocator manager. An allocator manager is responsible for that piece of memory no matter which allocator(s) it is working with. An allocator manager will have relationships with a piece of raw memory (via its reference to UnsignedDirectLittleEndian) as well as references to each BufferAllocator it has a relationship to.
+
+### Allocator Perspective
+<pre>
++ RootAllocator
+|-+ ChildAllocator 1
+| | - ChildAllocator 1.1
+| ` ...
+|
+|-+ ChildAllocator 2
+|-+ ChildAllocator 3
+| |
+| |-+ BufferLedger 1 ==> AllocationManager 1 (owning) ==> UDLE
+| | `- ArrowBuf 1
+| `-+ BufferLedger 2 ==> AllocationManager 2 (non-owning)==> UDLE
+| `- ArrowBuf 2
+|
+|-+ BufferLedger 3 ==> AllocationManager 1 (non-owning)==> UDLE
+| ` - ArrowBuf 3
+|-+ BufferLedger 4 ==> AllocationManager 2 (owning) ==> UDLE
+ | - ArrowBuf 4
+ | - ArrowBuf 5
+ ` - ArrowBuf 6
+</pre>
+
+In this picture, a RootAllocator owns three ChildAllocators. The first ChildAllocator (ChildAllocator 1) owns a subsequent ChildAllocator. ChildAllocator has two BufferLedgers/AllocationManager references. Coincidentally, each of these AllocationManager's is also associated with the RootAllocator. In this case, one of the these AllocationManagers is owned by ChildAllocator 3 (AllocationManager 1) while the other AllocationManager (AllocationManager 2) is owned/accounted for by the RootAllocator. Note that in this scenario, ArrowBuf 1 is sharing the underlying memory as ArrowBuf 3. However the subset of that memory (e.g. through slicing) might be different. Also note that ArrowBuf 2 and ArrowBuf 4, 5 and 6 are also sharing the same underlying memory. Also note that ArrowBuf 4, 5 and 6 all share the same reference count and fate.
+
+## Debugging Issues
+The Allocator object provides a useful set of tools to better understand the status of the allocator. If in `DEBUG` mode, the allocator and supporting classes will record additional debug tracking information to better track down memory leaks and issues. To enable DEBUG mode, either enable Java assertions with `-ea` or pass the following system property to the VM when starting `-Darrow.memory.debug.allocator=true`. The BufferAllocator also provides a `BufferAllocator.toVerboseString()` which can be used in DEBUG mode to get extensive stacktrace information and events associated with various Allocator behaviors.
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ReferenceManager.java b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ReferenceManager.java
new file mode 100644
index 000000000..00ae274b7
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ReferenceManager.java
@@ -0,0 +1,175 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+/**
+ * Reference Manager manages one or more ArrowBufs that share the
+ * reference count for the underlying memory chunk.
+ */
+public interface ReferenceManager {
+
+ /**
+ * Return the reference count.
+ * @return reference count
+ */
+ int getRefCount();
+
+ /**
+ * Decrement this reference manager's reference count by 1 for the associated underlying
+ * memory. If the reference count drops to 0, it implies that ArrowBufs managed by this
+ * reference manager no longer need access to the underlying memory
+ * @return true if ref count has dropped to 0, false otherwise
+ */
+ boolean release();
+
+ /**
+ * Decrement this reference manager's reference count for the associated underlying
+ * memory. If the reference count drops to 0, it implies that ArrowBufs managed by this
+ * reference manager no longer need access to the underlying memory
+ * @param decrement the count to decrease the reference count by
+ * @return the new reference count
+ */
+ boolean release(int decrement);
+
+ /**
+ * Increment this reference manager's reference count by 1 for the associated underlying
+ * memory.
+ */
+ void retain();
+
+ /**
+ * Increment this reference manager's reference count by a given amount for the
+ * associated underlying memory.
+ * @param increment the count to increase the reference count by
+ */
+ void retain(int increment);
+
+ /**
+ * Create a new ArrowBuf that is associated with an alternative allocator for the purposes of
+ * memory ownership and accounting. This has no impact on the reference counting for the current
+ * ArrowBuf except in the situation where the passed in Allocator is the same as the current buffer.
+ * This operation has no impact on the reference count of this ArrowBuf. The newly created
+ * ArrowBuf with either have a reference count of 1 (in the case that this is the first time this
+ * memory is being associated with the target allocator or in other words allocation manager currently
+ * doesn't hold a mapping for the target allocator) or the current value of the reference count for
+ * the target allocator-reference manager combination + 1 in the case that the provided allocator
+ * already had an association to this underlying memory.
+ *
+ * @param srcBuffer source ArrowBuf
+ * @param targetAllocator The target allocator to create an association with.
+ * @return A new ArrowBuf which shares the same underlying memory as this ArrowBuf.
+ */
+ ArrowBuf retain(ArrowBuf srcBuffer, BufferAllocator targetAllocator);
+
+ /**
+ * Derive a new ArrowBuf from a given source ArrowBuf. The new derived
+ * ArrowBuf will share the same reference count as rest of the ArrowBufs
+ * associated with this reference manager.
+ * @param sourceBuffer source ArrowBuf
+ * @param index index (relative to source ArrowBuf) new ArrowBuf should be derived from
+ * @param length length (bytes) of data in underlying memory that derived buffer will
+ * have access to in underlying memory
+ * @return derived buffer
+ */
+ ArrowBuf deriveBuffer(ArrowBuf sourceBuffer, long index, long length);
+
+ /**
+ * Transfer the memory accounting ownership of this ArrowBuf to another allocator.
+ * This will generate a new ArrowBuf that carries an association with the underlying memory
+ * for the given ArrowBuf
+ * @param sourceBuffer source ArrowBuf
+ * @param targetAllocator The target allocator to create an association with
+ * @return {@link OwnershipTransferResult} with info on transfer result and new buffer
+ */
+ OwnershipTransferResult transferOwnership(ArrowBuf sourceBuffer, BufferAllocator targetAllocator);
+
+ /**
+ * Get the buffer allocator associated with this reference manager.
+ * @return buffer allocator.
+ */
+ BufferAllocator getAllocator();
+
+ /**
+ * Total size (in bytes) of memory underlying this reference manager.
+ * @return Size (in bytes) of the memory chunk.
+ */
+ long getSize();
+
+ /**
+ * Get the total accounted size (in bytes).
+ * @return accounted size.
+ */
+ long getAccountedSize();
+
+ String NO_OP_ERROR_MESSAGE = "Operation not supported on NO_OP Reference Manager";
+
+ // currently used for empty ArrowBufs
+ ReferenceManager NO_OP = new ReferenceManager() {
+ @Override
+ public int getRefCount() {
+ return 1;
+ }
+
+ @Override
+ public boolean release() {
+ return false;
+ }
+
+ @Override
+ public boolean release(int decrement) {
+ return false;
+ }
+
+ @Override
+ public void retain() { }
+
+ @Override
+ public void retain(int increment) { }
+
+ @Override
+ public ArrowBuf retain(ArrowBuf srcBuffer, BufferAllocator targetAllocator) {
+ return srcBuffer;
+ }
+
+ @Override
+ public ArrowBuf deriveBuffer(ArrowBuf sourceBuffer, long index, long length) {
+ return sourceBuffer;
+ }
+
+ @Override
+ public OwnershipTransferResult transferOwnership(ArrowBuf sourceBuffer, BufferAllocator targetAllocator) {
+ return new OwnershipTransferNOOP(sourceBuffer);
+ }
+
+ @Override
+ public BufferAllocator getAllocator() {
+ return new RootAllocator(0);
+ }
+
+ @Override
+ public long getSize() {
+ return 0L;
+ }
+
+ @Override
+ public long getAccountedSize() {
+ return 0L;
+ }
+
+ };
+}
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/RootAllocator.java b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/RootAllocator.java
new file mode 100644
index 000000000..89889118c
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/RootAllocator.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+import org.apache.arrow.memory.rounding.DefaultRoundingPolicy;
+import org.apache.arrow.memory.rounding.RoundingPolicy;
+import org.apache.arrow.util.VisibleForTesting;
+
+/**
+ * A root allocator for using direct memory for Arrow Vectors/Arrays. Supports creating a
+ * tree of descendant child allocators to facilitate better instrumentation of memory
+ * allocations.
+ */
+public class RootAllocator extends BaseAllocator {
+
+ public RootAllocator() {
+ this(AllocationListener.NOOP, Long.MAX_VALUE);
+ }
+
+ public RootAllocator(final long limit) {
+ this(AllocationListener.NOOP, limit);
+ }
+
+ public RootAllocator(final AllocationListener listener, final long limit) {
+ //todo fix DefaultRoundingPolicy when using Netty
+ this(listener, limit, DefaultRoundingPolicy.DEFAULT_ROUNDING_POLICY);
+ }
+
+ /**
+ * Constructor.
+ *
+ * @param listener the allocation listener
+ * @param limit max allocation size in bytes
+ * @param roundingPolicy the policy for rounding the buffer size
+ */
+ public RootAllocator(final AllocationListener listener, final long limit, RoundingPolicy roundingPolicy) {
+ this(configBuilder()
+ .listener(listener)
+ .maxAllocation(limit)
+ .roundingPolicy(roundingPolicy)
+ .build()
+ );
+ }
+
+ public RootAllocator(Config config) {
+ super(null, "ROOT", config);
+ }
+
+ /**
+ * Verify the accounting state of the allocation system.
+ */
+ @VisibleForTesting
+ public void verify() {
+ verifyAllocator();
+ }
+}
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ValueWithKeyIncluded.java b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ValueWithKeyIncluded.java
new file mode 100644
index 000000000..2699b6a46
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ValueWithKeyIncluded.java
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+/**
+ * Helper interface to generify a value to be included in the map where
+ * key is part of the value.
+ *
+ * @param <K> The type of the key.
+ */
+public interface ValueWithKeyIncluded<K> {
+ K getKey();
+}
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/package-info.java b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/package-info.java
new file mode 100644
index 000000000..5aef955a3
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/package-info.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Memory Allocation, Account and Management
+ *
+ * See the README.md file in this directory for detailed information about Arrow's memory
+ * allocation subsystem.
+ *
+ */
+
+package org.apache.arrow.memory;
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/DefaultRoundingPolicy.java b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/DefaultRoundingPolicy.java
new file mode 100644
index 000000000..7ba231b0c
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/DefaultRoundingPolicy.java
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory.rounding;
+
+import org.apache.arrow.memory.util.CommonUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * The default rounding policy. That is, if the requested size is within the chunk size,
+ * the rounded size will be the next power of two. Otherwise, the rounded size
+ * will be identical to the requested size.
+ */
+public class DefaultRoundingPolicy implements RoundingPolicy {
+ private static final Logger logger = LoggerFactory.getLogger(DefaultRoundingPolicy.class);
+ public final long chunkSize;
+
+ /**
+ * The variables here and the static block calculates the DEFAULT_CHUNK_SIZE.
+ *
+ * <p>
+ * It was copied from {@link io.netty.buffer.PooledByteBufAllocator}.
+ * </p>
+ */
+ private static final int MIN_PAGE_SIZE = 4096;
+ private static final int MAX_CHUNK_SIZE = (int) (((long) Integer.MAX_VALUE + 1) / 2);
+ private static final long DEFAULT_CHUNK_SIZE;
+
+
+ static {
+ int defaultPageSize = Integer.getInteger("org.apache.memory.allocator.pageSize", 8192);
+ Throwable pageSizeFallbackCause = null;
+ try {
+ validateAndCalculatePageShifts(defaultPageSize);
+ } catch (Throwable t) {
+ pageSizeFallbackCause = t;
+ defaultPageSize = 8192;
+ }
+
+ int defaultMaxOrder = Integer.getInteger("org.apache.memory.allocator.maxOrder", 11);
+ Throwable maxOrderFallbackCause = null;
+ try {
+ validateAndCalculateChunkSize(defaultPageSize, defaultMaxOrder);
+ } catch (Throwable t) {
+ maxOrderFallbackCause = t;
+ defaultMaxOrder = 11;
+ }
+ DEFAULT_CHUNK_SIZE = validateAndCalculateChunkSize(defaultPageSize, defaultMaxOrder);
+ if (logger.isDebugEnabled()) {
+ logger.debug("-Dorg.apache.memory.allocator.pageSize: {}", defaultPageSize);
+ logger.debug("-Dorg.apache.memory.allocator.maxOrder: {}", defaultMaxOrder);
+ }
+ }
+
+ private static int validateAndCalculatePageShifts(int pageSize) {
+ if (pageSize < MIN_PAGE_SIZE) {
+ throw new IllegalArgumentException("pageSize: " + pageSize + " (expected: " + MIN_PAGE_SIZE + ")");
+ }
+
+ if ((pageSize & pageSize - 1) != 0) {
+ throw new IllegalArgumentException("pageSize: " + pageSize + " (expected: power of 2)");
+ }
+
+ // Logarithm base 2. At this point we know that pageSize is a power of two.
+ return Integer.SIZE - 1 - Integer.numberOfLeadingZeros(pageSize);
+ }
+
+ private static int validateAndCalculateChunkSize(int pageSize, int maxOrder) {
+ if (maxOrder > 14) {
+ throw new IllegalArgumentException("maxOrder: " + maxOrder + " (expected: 0-14)");
+ }
+
+ // Ensure the resulting chunkSize does not overflow.
+ int chunkSize = pageSize;
+ for (int i = maxOrder; i > 0; i --) {
+ if (chunkSize > MAX_CHUNK_SIZE / 2) {
+ throw new IllegalArgumentException(String.format(
+ "pageSize (%d) << maxOrder (%d) must not exceed %d", pageSize, maxOrder, MAX_CHUNK_SIZE));
+ }
+ chunkSize <<= 1;
+ }
+ return chunkSize;
+ }
+
+ /**
+ * The singleton instance.
+ */
+ public static final DefaultRoundingPolicy DEFAULT_ROUNDING_POLICY = new DefaultRoundingPolicy(DEFAULT_CHUNK_SIZE);
+
+ private DefaultRoundingPolicy(long chunkSize) {
+ this.chunkSize = chunkSize;
+ }
+
+ @Override
+ public long getRoundedSize(long requestSize) {
+ return requestSize < chunkSize ?
+ CommonUtil.nextPowerOfTwo(requestSize) : requestSize;
+ }
+}
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/RoundingPolicy.java b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/RoundingPolicy.java
new file mode 100644
index 000000000..434b9a0cd
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/RoundingPolicy.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory.rounding;
+
+/**
+ * The policy for rounding the buffer size, to improve performance and avoid memory fragmentation.
+ * In particular, given a requested buffer size, the policy will determine the rounded buffer size.
+ */
+public interface RoundingPolicy {
+ long getRoundedSize(long requestSize);
+}
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/SegmentRoundingPolicy.java b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/SegmentRoundingPolicy.java
new file mode 100644
index 000000000..d2bc4451d
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/SegmentRoundingPolicy.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory.rounding;
+
+import org.apache.arrow.util.Preconditions;
+
+/**
+ * The rounding policy that each buffer size must a multiple of the segment size.
+ */
+public class SegmentRoundingPolicy implements RoundingPolicy {
+
+ /**
+ * The minimal segment size.
+ */
+ public static final long MIN_SEGMENT_SIZE = 1024L;
+
+ /**
+ * The segment size. It must be at least {@link SegmentRoundingPolicy#MIN_SEGMENT_SIZE},
+ * and be a power of 2.
+ */
+ private int segmentSize;
+
+ /**
+ * Constructor for the segment rounding policy.
+ * @param segmentSize the segment size.
+ * @throws IllegalArgumentException if the segment size is smaller than
+ * {@link SegmentRoundingPolicy#MIN_SEGMENT_SIZE}, or is not a power of 2.
+ */
+ public SegmentRoundingPolicy(int segmentSize) {
+ Preconditions.checkArgument(segmentSize >= MIN_SEGMENT_SIZE,
+ "The segment size cannot be smaller than %s", MIN_SEGMENT_SIZE);
+ Preconditions.checkArgument((segmentSize & (segmentSize - 1)) == 0,
+ "The segment size must be a power of 2");
+ this.segmentSize = segmentSize;
+ }
+
+ @Override
+ public long getRoundedSize(long requestSize) {
+ return (requestSize + (segmentSize - 1)) / segmentSize * segmentSize;
+ }
+
+ public int getSegmentSize() {
+ return segmentSize;
+ }
+}
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/ArrowBufPointer.java b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/ArrowBufPointer.java
new file mode 100644
index 000000000..fa1cfbdb2
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/ArrowBufPointer.java
@@ -0,0 +1,187 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory.util;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.memory.util.hash.SimpleHasher;
+import org.apache.arrow.util.Preconditions;
+
+/**
+ * Pointer to a memory region within an {@link ArrowBuf}.
+ * It will be used as the basis for calculating hash code within a vector, and equality determination.
+ */
+public final class ArrowBufPointer {
+
+ /**
+ * The hash code when the arrow buffer is null.
+ */
+ public static final int NULL_HASH_CODE = 0;
+
+ private ArrowBuf buf;
+
+ private long offset;
+
+ private long length;
+
+ private int hashCode = NULL_HASH_CODE;
+
+ private final ArrowBufHasher hasher;
+
+ /**
+ * A flag indicating if the underlying memory region has changed.
+ */
+ private boolean hashCodeChanged = false;
+
+ /**
+ * The default constructor.
+ */
+ public ArrowBufPointer() {
+ this(SimpleHasher.INSTANCE);
+ }
+
+ /**
+ * Constructs an arrow buffer pointer with the specified hasher.
+ * @param hasher the hasher to use.
+ */
+ public ArrowBufPointer(ArrowBufHasher hasher) {
+ Preconditions.checkNotNull(hasher);
+ this.hasher = hasher;
+ }
+
+ /**
+ * Constructs an Arrow buffer pointer.
+ * @param buf the underlying {@link ArrowBuf}, which can be null.
+ * @param offset the start off set of the memory region pointed to.
+ * @param length the length off set of the memory region pointed to.
+ */
+ public ArrowBufPointer(ArrowBuf buf, long offset, long length) {
+ this(buf, offset, length, SimpleHasher.INSTANCE);
+ }
+
+ /**
+ * Constructs an Arrow buffer pointer.
+ * @param buf the underlying {@link ArrowBuf}, which can be null.
+ * @param offset the start off set of the memory region pointed to.
+ * @param length the length off set of the memory region pointed to.
+ * @param hasher the hasher used to calculate the hash code.
+ */
+ public ArrowBufPointer(ArrowBuf buf, long offset, long length, ArrowBufHasher hasher) {
+ Preconditions.checkNotNull(hasher);
+ this.hasher = hasher;
+ set(buf, offset, length);
+ }
+
+ /**
+ * Sets this pointer.
+ * @param buf the underlying {@link ArrowBuf}, which can be null.
+ * @param offset the start off set of the memory region pointed to.
+ * @param length the length off set of the memory region pointed to.
+ */
+ public void set(ArrowBuf buf, long offset, long length) {
+ this.buf = buf;
+ this.offset = offset;
+ this.length = length;
+
+ hashCodeChanged = true;
+ }
+
+ /**
+ * Gets the underlying buffer, or null if the underlying data is invalid or null.
+ * @return the underlying buffer, if any, or null if the underlying data is invalid or null.
+ */
+ public ArrowBuf getBuf() {
+ return buf;
+ }
+
+ public long getOffset() {
+ return offset;
+ }
+
+ public long getLength() {
+ return length;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+
+ if (!hasher.equals(((ArrowBufPointer) o).hasher)) {
+ // note that the hasher is incorporated in equality determination
+ // this is to avoid problems in cases where two Arrow buffer pointers are not equal
+ // while having different hashers and equal hash codes.
+ return false;
+ }
+
+ ArrowBufPointer other = (ArrowBufPointer) o;
+ if (buf == null || other.buf == null) {
+ if (buf == null && other.buf == null) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ return ByteFunctionHelpers.equal(buf, offset, offset + length,
+ other.buf, other.offset, other.offset + other.length) != 0;
+ }
+
+ @Override
+ public int hashCode() {
+ if (!hashCodeChanged) {
+ return hashCode;
+ }
+
+ // re-compute the hash code
+ if (buf == null) {
+ hashCode = NULL_HASH_CODE;
+ } else {
+ hashCode = hasher.hashCode(buf, offset, length);
+ }
+
+ hashCodeChanged = false;
+ return hashCode;
+ }
+
+ /**
+ * Compare two arrow buffer pointers.
+ * The comparison is based on lexicographic order.
+ * @param that the other pointer to compare.
+ * @return 0 if the two pointers are equal;
+ * a positive integer if this pointer is larger;
+ * a negative integer if this pointer is smaller.
+ */
+ public int compareTo(ArrowBufPointer that) {
+ if (this.buf == null || that.buf == null) {
+ if (this.buf == null && that.buf == null) {
+ return 0;
+ } else {
+ // null is smaller
+ return this.buf == null ? -1 : 1;
+ }
+ }
+
+ return ByteFunctionHelpers.compare(this.buf, this.offset, this.offset + this.length,
+ that.buf, that.offset, that.offset + that.length);
+ }
+}
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/AssertionUtil.java b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/AssertionUtil.java
new file mode 100644
index 000000000..5e5b331fa
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/AssertionUtil.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory.util;
+
+/**
+ * Utility class to that provides {@link #ASSERT_ENABLED} constant to determine if assertions are enabled.
+ */
+public class AssertionUtil {
+
+ public static final boolean ASSERT_ENABLED;
+ static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(AssertionUtil.class);
+
+ static {
+ boolean isAssertEnabled = false;
+ assert isAssertEnabled = true;
+ ASSERT_ENABLED = isAssertEnabled;
+ }
+
+ private AssertionUtil() {
+ }
+
+ public static boolean isAssertionsEnabled() {
+ return ASSERT_ENABLED;
+ }
+}
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/AutoCloseableLock.java b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/AutoCloseableLock.java
new file mode 100644
index 000000000..95228cf78
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/AutoCloseableLock.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory.util;
+
+import java.util.concurrent.locks.Lock;
+
+/**
+ * Simple wrapper class that allows Locks to be released via a try-with-resources block.
+ */
+public class AutoCloseableLock implements AutoCloseable {
+
+ private final Lock lock;
+
+ public AutoCloseableLock(Lock lock) {
+ this.lock = lock;
+ }
+
+ public AutoCloseableLock open() {
+ lock.lock();
+ return this;
+ }
+
+ @Override
+ public void close() {
+ lock.unlock();
+ }
+
+}
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/ByteFunctionHelpers.java b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/ByteFunctionHelpers.java
new file mode 100644
index 000000000..9579245ca
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/ByteFunctionHelpers.java
@@ -0,0 +1,347 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory.util;
+
+import java.nio.ByteOrder;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BoundsChecking;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.memory.util.hash.SimpleHasher;
+
+/**
+ * Utility methods for memory comparison at a byte level.
+ */
+public class ByteFunctionHelpers {
+ static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(ByteFunctionHelpers.class);
+
+ private static final boolean LITTLE_ENDIAN = ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN;
+
+ private ByteFunctionHelpers() {}
+
+ /**
+ * Helper function to check for equality of bytes in two ArrowBufs.
+ *
+ * @param left Left ArrowBuf for comparison
+ * @param lStart start offset in the buffer
+ * @param lEnd end offset in the buffer
+ * @param right Right ArrowBuf for comparison
+ * @param rStart start offset in the buffer
+ * @param rEnd end offset in the buffer
+ * @return 1 if equals, 0 otherwise
+ */
+ public static int equal(final ArrowBuf left, long lStart, long lEnd, final ArrowBuf right, long rStart, long rEnd) {
+ if (BoundsChecking.BOUNDS_CHECKING_ENABLED) {
+ left.checkBytes(lStart, lEnd);
+ right.checkBytes(rStart, rEnd);
+ }
+ return memEqual(left.memoryAddress(), lStart, lEnd, right.memoryAddress(), rStart, rEnd);
+ }
+
+ private static int memEqual(final long laddr, long lStart, long lEnd, final long raddr, long rStart,
+ final long rEnd) {
+
+ long n = lEnd - lStart;
+ if (n == rEnd - rStart) {
+ long lPos = laddr + lStart;
+ long rPos = raddr + rStart;
+
+ while (n > 63) {
+ for (int x = 0; x < 8; x++) {
+ long leftLong = MemoryUtil.UNSAFE.getLong(lPos);
+ long rightLong = MemoryUtil.UNSAFE.getLong(rPos);
+ if (leftLong != rightLong) {
+ return 0;
+ }
+ lPos += 8;
+ rPos += 8;
+ }
+ n -= 64;
+ }
+
+ while (n > 7) {
+ long leftLong = MemoryUtil.UNSAFE.getLong(lPos);
+ long rightLong = MemoryUtil.UNSAFE.getLong(rPos);
+ if (leftLong != rightLong) {
+ return 0;
+ }
+ lPos += 8;
+ rPos += 8;
+ n -= 8;
+ }
+
+ if (n > 3) {
+ int leftInt = MemoryUtil.UNSAFE.getInt(lPos);
+ int rightInt = MemoryUtil.UNSAFE.getInt(rPos);
+ if (leftInt != rightInt) {
+ return 0;
+ }
+ lPos += 4;
+ rPos += 4;
+ n -= 4;
+ }
+
+ while (n-- != 0) {
+ byte leftByte = MemoryUtil.UNSAFE.getByte(lPos);
+ byte rightByte = MemoryUtil.UNSAFE.getByte(rPos);
+ if (leftByte != rightByte) {
+ return 0;
+ }
+ lPos++;
+ rPos++;
+ }
+ return 1;
+ } else {
+ return 0;
+ }
+ }
+
+ /**
+ * Helper function to compare a set of bytes in two ArrowBufs.
+ *
+ * <p>Function will check data before completing in the case that
+ *
+ * @param left Left ArrowBuf to compare
+ * @param lStart start offset in the buffer
+ * @param lEnd end offset in the buffer
+ * @param right Right ArrowBuf to compare
+ * @param rStart start offset in the buffer
+ * @param rEnd end offset in the buffer
+ * @return 1 if left input is greater, -1 if left input is smaller, 0 otherwise
+ */
+ public static int compare(
+ final ArrowBuf left,
+ long lStart,
+ long lEnd,
+ final ArrowBuf right,
+ long rStart,
+ long rEnd) {
+ if (BoundsChecking.BOUNDS_CHECKING_ENABLED) {
+ left.checkBytes(lStart, lEnd);
+ right.checkBytes(rStart, rEnd);
+ }
+ return memcmp(left.memoryAddress(), lStart, lEnd, right.memoryAddress(), rStart, rEnd);
+ }
+
+ private static int memcmp(
+ final long laddr,
+ long lStart,
+ long lEnd,
+ final long raddr,
+ long rStart,
+ final long rEnd) {
+ long lLen = lEnd - lStart;
+ long rLen = rEnd - rStart;
+ long n = Math.min(rLen, lLen);
+ long lPos = laddr + lStart;
+ long rPos = raddr + rStart;
+
+ while (n > 63) {
+ for (int x = 0; x < 8; x++) {
+ long leftLong = MemoryUtil.UNSAFE.getLong(lPos);
+ long rightLong = MemoryUtil.UNSAFE.getLong(rPos);
+ if (leftLong != rightLong) {
+ if (LITTLE_ENDIAN) {
+ return unsignedLongCompare(Long.reverseBytes(leftLong), Long.reverseBytes(rightLong));
+ } else {
+ return unsignedLongCompare(leftLong, rightLong);
+ }
+ }
+ lPos += 8;
+ rPos += 8;
+ }
+ n -= 64;
+ }
+
+ while (n > 7) {
+ long leftLong = MemoryUtil.UNSAFE.getLong(lPos);
+ long rightLong = MemoryUtil.UNSAFE.getLong(rPos);
+ if (leftLong != rightLong) {
+ if (LITTLE_ENDIAN) {
+ return unsignedLongCompare(Long.reverseBytes(leftLong), Long.reverseBytes(rightLong));
+ } else {
+ return unsignedLongCompare(leftLong, rightLong);
+ }
+ }
+ lPos += 8;
+ rPos += 8;
+ n -= 8;
+ }
+
+ if (n > 3) {
+ int leftInt = MemoryUtil.UNSAFE.getInt(lPos);
+ int rightInt = MemoryUtil.UNSAFE.getInt(rPos);
+ if (leftInt != rightInt) {
+ if (LITTLE_ENDIAN) {
+ return unsignedIntCompare(Integer.reverseBytes(leftInt), Integer.reverseBytes(rightInt));
+ } else {
+ return unsignedIntCompare(leftInt, rightInt);
+ }
+ }
+ lPos += 4;
+ rPos += 4;
+ n -= 4;
+ }
+
+ while (n-- != 0) {
+ byte leftByte = MemoryUtil.UNSAFE.getByte(lPos);
+ byte rightByte = MemoryUtil.UNSAFE.getByte(rPos);
+ if (leftByte != rightByte) {
+ return ((leftByte & 0xFF) - (rightByte & 0xFF)) > 0 ? 1 : -1;
+ }
+ lPos++;
+ rPos++;
+ }
+
+ if (lLen == rLen) {
+ return 0;
+ }
+
+ return lLen > rLen ? 1 : -1;
+
+ }
+
+ /**
+ * Helper function to compare a set of bytes in ArrowBuf to a ByteArray.
+ *
+ * @param left Left ArrowBuf for comparison purposes
+ * @param lStart start offset in the buffer
+ * @param lEnd end offset in the buffer
+ * @param right second input to be compared
+ * @param rStart start offset in the byte array
+ * @param rEnd end offset in the byte array
+ * @return 1 if left input is greater, -1 if left input is smaller, 0 otherwise
+ */
+ public static int compare(
+ final ArrowBuf left,
+ int lStart,
+ int lEnd,
+ final byte[] right,
+ int rStart,
+ final int rEnd) {
+ if (BoundsChecking.BOUNDS_CHECKING_ENABLED) {
+ left.checkBytes(lStart, lEnd);
+ }
+ return memcmp(left.memoryAddress(), lStart, lEnd, right, rStart, rEnd);
+ }
+
+
+ /**
+ * Compares the two specified {@code long} values, treating them as unsigned values between
+ * {@code 0} and {@code 2^64 - 1} inclusive.
+ *
+ * @param a the first unsigned {@code long} to compare
+ * @param b the second unsigned {@code long} to compare
+ * @return a negative value if {@code a} is less than {@code b}; a positive value if {@code a} is
+ * greater than {@code b}; or zero if they are equal
+ */
+ public static int unsignedLongCompare(long a, long b) {
+ return Long.compare(a ^ Long.MIN_VALUE, b ^ Long.MIN_VALUE);
+ }
+
+ public static int unsignedIntCompare(int a, int b) {
+ return Integer.compare(a ^ Integer.MIN_VALUE, b ^ Integer.MIN_VALUE);
+ }
+
+ private static int memcmp(
+ final long laddr,
+ int lStart,
+ int lEnd,
+ final byte[] right,
+ int rStart,
+ final int rEnd) {
+ int lLen = lEnd - lStart;
+ int rLen = rEnd - rStart;
+ int n = Math.min(rLen, lLen);
+ long lPos = laddr + lStart;
+ int rPos = rStart;
+
+ while (n > 7) {
+ long leftLong = MemoryUtil.UNSAFE.getLong(lPos);
+ long rightLong = MemoryUtil.UNSAFE.getLong(right, MemoryUtil.BYTE_ARRAY_BASE_OFFSET + rPos);
+ if (leftLong != rightLong) {
+ if (LITTLE_ENDIAN) {
+ return unsignedLongCompare(Long.reverseBytes(leftLong), Long.reverseBytes(rightLong));
+ } else {
+ return unsignedLongCompare(leftLong, rightLong);
+ }
+ }
+ lPos += 8;
+ rPos += 8;
+ n -= 8;
+ }
+
+ if (n > 3) {
+ int leftInt = MemoryUtil.UNSAFE.getInt(lPos);
+ int rightInt = MemoryUtil.UNSAFE.getInt(right, MemoryUtil.BYTE_ARRAY_BASE_OFFSET + rPos);
+ if (leftInt != rightInt) {
+ if (LITTLE_ENDIAN) {
+ return unsignedIntCompare(Integer.reverseBytes(leftInt), Integer.reverseBytes(rightInt));
+ } else {
+ return unsignedIntCompare(leftInt, rightInt);
+ }
+ }
+ lPos += 4;
+ rPos += 4;
+ n -= 4;
+ }
+
+ while (n-- != 0) {
+ byte leftByte = MemoryUtil.UNSAFE.getByte(lPos);
+ byte rightByte = right[rPos];
+ if (leftByte != rightByte) {
+ return ((leftByte & 0xFF) - (rightByte & 0xFF)) > 0 ? 1 : -1;
+ }
+ lPos++;
+ rPos++;
+ }
+
+ if (lLen == rLen) {
+ return 0;
+ }
+
+ return lLen > rLen ? 1 : -1;
+ }
+
+ /**
+ * Compute hashCode with the given {@link ArrowBuf} and start/end index.
+ */
+ public static int hash(final ArrowBuf buf, long start, long end) {
+
+ return hash(SimpleHasher.INSTANCE, buf, start, end);
+ }
+
+ /**
+ * Compute hashCode with the given {@link ArrowBufHasher}, {@link ArrowBuf} and start/end index.
+ */
+ public static final int hash(ArrowBufHasher hasher, final ArrowBuf buf, long start, long end) {
+
+ if (hasher == null) {
+ hasher = SimpleHasher.INSTANCE;
+ }
+
+ return hasher.hashCode(buf, start, end - start);
+ }
+
+ /**
+ * Generate a new hashCode with the given current hashCode and new hashCode.
+ */
+ public static int combineHash(int currentHash, int newHash) {
+ return currentHash * 31 + newHash;
+ }
+}
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/CommonUtil.java b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/CommonUtil.java
new file mode 100644
index 000000000..ccca7b1e0
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/CommonUtil.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory.util;
+
+import java.util.Arrays;
+
+/**
+ * Utilities and static methods needed for arrow-memory.
+ */
+public final class CommonUtil {
+
+ private CommonUtil() { }
+
+ /**
+ * Rounds up the provided value to the nearest power of two.
+ *
+ * @param val An integer value.
+ * @return The closest power of two of that value.
+ */
+ public static int nextPowerOfTwo(int val) {
+ if (val == 0 || val == 1) {
+ return val + 1;
+ }
+ int highestBit = Integer.highestOneBit(val);
+ if (highestBit == val) {
+ return val;
+ } else {
+ return highestBit << 1;
+ }
+ }
+
+ /**
+ * Rounds up the provided value to the nearest power of two.
+ *
+ * @param val A long value.
+ * @return The closest power of two of that value.
+ */
+ public static long nextPowerOfTwo(long val) {
+ if (val == 0 || val == 1) {
+ return val + 1;
+ }
+ long highestBit = Long.highestOneBit(val);
+ if (highestBit == val) {
+ return val;
+ } else {
+ return highestBit << 1;
+ }
+ }
+
+ /**
+ * Specify an indentation amount when using a StringBuilder.
+ *
+ * @param sb StringBuilder to use
+ * @param indent Indentation amount
+ * @return the StringBuilder object with indentation applied
+ */
+ public static StringBuilder indent(StringBuilder sb, int indent) {
+ final char[] indentation = new char[indent * 2];
+ Arrays.fill(indentation, ' ');
+ sb.append(indentation);
+ return sb;
+ }
+}
+
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/HistoricalLog.java b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/HistoricalLog.java
new file mode 100644
index 000000000..f02539a8a
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/HistoricalLog.java
@@ -0,0 +1,178 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory.util;
+
+import java.util.Arrays;
+import java.util.LinkedList;
+
+import org.slf4j.Logger;
+
+/**
+ * Utility class that can be used to log activity within a class
+ * for later logging and debugging. Supports recording events and
+ * recording the stack at the time they occur.
+ */
+public class HistoricalLog {
+
+ private final LinkedList<Event> history = new LinkedList<>();
+ private final String idString; // the formatted id string
+ private final int limit; // the limit on the number of events kept
+ private Event firstEvent; // the first stack trace recorded
+
+ /**
+ * Constructor. The format string will be formatted and have its arguments
+ * substituted at the time this is called.
+ *
+ * @param idStringFormat {@link String#format} format string that can be used to identify this
+ * object in a log. Including some kind of unique identifier that can be
+ * associated with the object instance is best.
+ * @param args for the format string, or nothing if none are required
+ */
+ public HistoricalLog(final String idStringFormat, Object... args) {
+ this(Integer.MAX_VALUE, idStringFormat, args);
+ }
+
+ /**
+ * Constructor. The format string will be formatted and have its arguments
+ * substituted at the time this is called.
+ *
+ * <p>This form supports the specification of a limit that will limit the
+ * number of historical entries kept (which keeps down the amount of memory
+ * used). With the limit, the first entry made is always kept (under the
+ * assumption that this is the creation site of the object, which is usually
+ * interesting), and then up to the limit number of entries are kept after that.
+ * Each time a new entry is made, the oldest that is not the first is dropped.
+ *
+ * @param limit the maximum number of historical entries that will be kept, not including
+ * the first entry made
+ * @param idStringFormat {@link String#format} format string that can be used to identify this
+ * object in a log. Including some kind of unique identifier that can be
+ * associated with the object instance is best.
+ * @param args for the format string, or nothing if none are required
+ */
+ public HistoricalLog(final int limit, final String idStringFormat, Object... args) {
+ this.limit = limit;
+ this.idString = String.format(idStringFormat, args);
+ }
+
+ /**
+ * Record an event. Automatically captures the stack trace at the time this is
+ * called. The format string will be formatted and have its arguments substituted
+ * at the time this is called.
+ *
+ * @param noteFormat {@link String#format} format string that describes the event
+ * @param args for the format string, or nothing if none are required
+ */
+ public synchronized void recordEvent(final String noteFormat, Object... args) {
+ final String note = String.format(noteFormat, args);
+ final Event event = new Event(note);
+ if (firstEvent == null) {
+ firstEvent = event;
+ }
+ if (history.size() == limit) {
+ history.removeFirst();
+ }
+ history.add(event);
+ }
+
+ /**
+ * Write the history of this object to the given {@link StringBuilder}. The history
+ * includes the identifying string provided at construction time, and all the recorded
+ * events with their stack traces.
+ *
+ * @param sb {@link StringBuilder} to write to
+ * @param includeStackTrace whether to include the stacktrace of each event in the history
+ */
+ public void buildHistory(final StringBuilder sb, boolean includeStackTrace) {
+ buildHistory(sb, 0, includeStackTrace);
+ }
+
+ /**
+ * Build the history and write it to sb.
+ *
+ * @param sb output
+ * @param indent starting indent (usually "")
+ * @param includeStackTrace whether to include the stacktrace of each event.
+ */
+ public synchronized void buildHistory(
+ final StringBuilder sb, int indent, boolean includeStackTrace) {
+ final char[] indentation = new char[indent];
+ final char[] innerIndentation = new char[indent + 2];
+ Arrays.fill(indentation, ' ');
+ Arrays.fill(innerIndentation, ' ');
+
+ sb.append(indentation)
+ .append("event log for: ")
+ .append(idString)
+ .append('\n');
+
+ if (firstEvent != null) {
+ sb.append(innerIndentation)
+ .append(firstEvent.time)
+ .append(' ')
+ .append(firstEvent.note)
+ .append('\n');
+ if (includeStackTrace) {
+ firstEvent.stackTrace.writeToBuilder(sb, indent + 2);
+ }
+
+ for (final Event event : history) {
+ if (event == firstEvent) {
+ continue;
+ }
+ sb.append(innerIndentation)
+ .append(" ")
+ .append(event.time)
+ .append(' ')
+ .append(event.note)
+ .append('\n');
+
+ if (includeStackTrace) {
+ event.stackTrace.writeToBuilder(sb, indent + 2);
+ sb.append('\n');
+ }
+ }
+ }
+ }
+
+ /**
+ * Write the history of this object to the given {@link Logger}. The history
+ * includes the identifying string provided at construction time, and all the recorded
+ * events with their stack traces.
+ *
+ * @param logger {@link Logger} to write to
+ */
+ public void logHistory(final Logger logger) {
+ final StringBuilder sb = new StringBuilder();
+ buildHistory(sb, 0, true);
+ logger.debug(sb.toString());
+ }
+
+ private static class Event {
+
+ private final String note; // the event text
+ private final StackTrace stackTrace; // where the event occurred
+ private final long time;
+
+ public Event(final String note) {
+ this.note = note;
+ this.time = System.nanoTime();
+ stackTrace = new StackTrace();
+ }
+ }
+}
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/LargeMemoryUtil.java b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/LargeMemoryUtil.java
new file mode 100644
index 000000000..db63bbd14
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/LargeMemoryUtil.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory.util;
+
+import org.apache.arrow.memory.BoundsChecking;
+
+/** Contains utilities for dealing with a 64-bit address base. */
+public final class LargeMemoryUtil {
+
+ private LargeMemoryUtil() {}
+
+ /**
+ * Casts length to an int, but raises an exception the value is outside
+ * the range of an int.
+ */
+ public static int checkedCastToInt(long length) {
+ if (BoundsChecking.BOUNDS_CHECKING_ENABLED) {
+ return Math.toIntExact(length);
+ }
+ return (int) length;
+ }
+
+ /**
+ * Returns a min(Integer.MAX_VALUE, length).
+ */
+ public static int capAtMaxInt(long length) {
+ return (int) Math.min(length, Integer.MAX_VALUE);
+ }
+}
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/MemoryUtil.java b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/MemoryUtil.java
new file mode 100644
index 000000000..16ef39702
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/MemoryUtil.java
@@ -0,0 +1,170 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory.util;
+
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Field;
+import java.lang.reflect.InvocationTargetException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.security.AccessController;
+import java.security.PrivilegedAction;
+
+import sun.misc.Unsafe;
+
+/**
+ * Utilities for memory related operations.
+ */
+public class MemoryUtil {
+ private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(MemoryUtil.class);
+
+ private static final Constructor<?> DIRECT_BUFFER_CONSTRUCTOR;
+ /**
+ * The unsafe object from which to access the off-heap memory.
+ */
+ public static final Unsafe UNSAFE;
+
+ /**
+ * The start offset of array data relative to the start address of the array object.
+ */
+ public static final long BYTE_ARRAY_BASE_OFFSET;
+
+ /**
+ * The offset of the address field with the {@link java.nio.ByteBuffer} object.
+ */
+ static final long BYTE_BUFFER_ADDRESS_OFFSET;
+
+ /**
+ * If the native byte order is little-endian.
+ */
+ public static final boolean LITTLE_ENDIAN = ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN;
+
+ static {
+ try {
+ // try to get the unsafe object
+ final Object maybeUnsafe = AccessController.doPrivileged(new PrivilegedAction<Object>() {
+ @Override
+ public Object run() {
+ try {
+ final Field unsafeField = Unsafe.class.getDeclaredField("theUnsafe");
+ unsafeField.setAccessible(true);
+ return unsafeField.get(null);
+ } catch (Throwable e) {
+ return e;
+ }
+ }
+ });
+
+ if (maybeUnsafe instanceof Throwable) {
+ throw (Throwable) maybeUnsafe;
+ }
+
+ UNSAFE = (Unsafe) maybeUnsafe;
+
+ // get the offset of the data inside a byte array object
+ BYTE_ARRAY_BASE_OFFSET = UNSAFE.arrayBaseOffset(byte[].class);
+
+ // get the offset of the address field in a java.nio.Buffer object
+ Field addressField = java.nio.Buffer.class.getDeclaredField("address");
+ addressField.setAccessible(true);
+ BYTE_BUFFER_ADDRESS_OFFSET = UNSAFE.objectFieldOffset(addressField);
+
+ Constructor<?> directBufferConstructor;
+ long address = -1;
+ final ByteBuffer direct = ByteBuffer.allocateDirect(1);
+ try {
+
+ final Object maybeDirectBufferConstructor =
+ AccessController.doPrivileged(new PrivilegedAction<Object>() {
+ @Override
+ public Object run() {
+ try {
+ final Constructor<?> constructor =
+ direct.getClass().getDeclaredConstructor(long.class, int.class);
+ constructor.setAccessible(true);
+ logger.debug("Constructor for direct buffer found and made accessible");
+ return constructor;
+ } catch (NoSuchMethodException e) {
+ logger.debug("Cannot get constructor for direct buffer allocation", e);
+ return e;
+ } catch (SecurityException e) {
+ logger.debug("Cannot get constructor for direct buffer allocation", e);
+ return e;
+ }
+ }
+ });
+
+ if (maybeDirectBufferConstructor instanceof Constructor<?>) {
+ address = UNSAFE.allocateMemory(1);
+ // try to use the constructor now
+ try {
+ ((Constructor<?>) maybeDirectBufferConstructor).newInstance(address, 1);
+ directBufferConstructor = (Constructor<?>) maybeDirectBufferConstructor;
+ logger.debug("direct buffer constructor: available");
+ } catch (InstantiationException | IllegalAccessException | InvocationTargetException e) {
+ logger.warn("unable to instantiate a direct buffer via constructor", e);
+ directBufferConstructor = null;
+ }
+ } else {
+ logger.debug(
+ "direct buffer constructor: unavailable",
+ (Throwable) maybeDirectBufferConstructor);
+ directBufferConstructor = null;
+ }
+ } finally {
+ if (address != -1) {
+ UNSAFE.freeMemory(address);
+ }
+ }
+ DIRECT_BUFFER_CONSTRUCTOR = directBufferConstructor;
+ } catch (Throwable e) {
+ throw new RuntimeException("Failed to initialize MemoryUtil.", e);
+ }
+ }
+
+ /**
+ * Given a {@link ByteBuffer}, gets the address the underlying memory space.
+ *
+ * @param buf the byte buffer.
+ * @return address of the underlying memory.
+ */
+ public static long getByteBufferAddress(ByteBuffer buf) {
+ return UNSAFE.getLong(buf, BYTE_BUFFER_ADDRESS_OFFSET);
+ }
+
+ private MemoryUtil() {
+ }
+
+ /**
+ * Create nio byte buffer.
+ */
+ public static ByteBuffer directBuffer(long address, int capacity) {
+ if (DIRECT_BUFFER_CONSTRUCTOR != null) {
+ if (capacity < 0) {
+ throw new IllegalArgumentException("Capacity is negative, has to be positive or 0");
+ }
+ try {
+ return (ByteBuffer) DIRECT_BUFFER_CONSTRUCTOR.newInstance(address, capacity);
+ } catch (Throwable cause) {
+ throw new Error(cause);
+ }
+ }
+ throw new UnsupportedOperationException(
+ "sun.misc.Unsafe or java.nio.DirectByteBuffer.<init>(long, int) not available");
+ }
+}
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/StackTrace.java b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/StackTrace.java
new file mode 100644
index 000000000..d743af86c
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/StackTrace.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory.util;
+
+import java.util.Arrays;
+
+/**
+ * Convenient way of obtaining and manipulating stack traces for debugging.
+ */
+public class StackTrace {
+
+ private final StackTraceElement[] stackTraceElements;
+
+ /**
+ * Constructor. Captures the current stack trace.
+ */
+ public StackTrace() {
+ // skip over the first element so that we don't include this constructor call
+ final StackTraceElement[] stack = Thread.currentThread().getStackTrace();
+ stackTraceElements = Arrays.copyOfRange(stack, 1, stack.length - 1);
+ }
+
+ /**
+ * Write the stack trace to a StringBuilder.
+ *
+ * @param sb where to write it
+ * @param indent how many double spaces to indent each line
+ */
+ public void writeToBuilder(final StringBuilder sb, final int indent) {
+ // create the indentation string
+ final char[] indentation = new char[indent * 2];
+ Arrays.fill(indentation, ' ');
+
+ // write the stack trace in standard Java format
+ for (StackTraceElement ste : stackTraceElements) {
+ sb.append(indentation)
+ .append("at ")
+ .append(ste.getClassName())
+ .append('.')
+ .append(ste.getMethodName())
+ .append('(')
+ .append(ste.getFileName())
+ .append(':')
+ .append(Integer.toString(ste.getLineNumber()))
+ .append(")\n");
+ }
+ }
+
+ @Override
+ public String toString() {
+ final StringBuilder sb = new StringBuilder();
+ writeToBuilder(sb, 0);
+ return sb.toString();
+ }
+}
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/ArrowBufHasher.java b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/ArrowBufHasher.java
new file mode 100644
index 000000000..0de8e62a4
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/ArrowBufHasher.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory.util.hash;
+
+import org.apache.arrow.memory.ArrowBuf;
+
+/**
+ * Utility for calculating the hash code for a consecutive memory region.
+ * This class provides the basic framework for efficiently calculating the hash code.
+ * <p>
+ * A default light-weight implementation is given in {@link SimpleHasher}.
+ * </p>
+ */
+public interface ArrowBufHasher {
+
+ /**
+ * Calculates the hash code for a memory region.
+ * @param address start address of the memory region.
+ * @param length length of the memory region.
+ * @return the hash code.
+ */
+ int hashCode(long address, long length);
+
+ /**
+ * Calculates the hash code for a memory region.
+ * @param buf the buffer for the memory region.
+ * @param offset offset within the buffer for the memory region.
+ * @param length length of the memory region.
+ * @return the hash code.
+ */
+ int hashCode(ArrowBuf buf, long offset, long length);
+}
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/MurmurHasher.java b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/MurmurHasher.java
new file mode 100644
index 000000000..ea565dfca
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/MurmurHasher.java
@@ -0,0 +1,175 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory.util.hash;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.util.MemoryUtil;
+
+/**
+ * Implementation of the Murmur hashing algorithm.
+ * Details of the algorithm can be found in
+ * https://en.wikipedia.org/wiki/MurmurHash
+ * <p>
+ * Murmur hashing is computationally expensive, as it involves several
+ * integer multiplications. However, the produced hash codes have
+ * good quality in the sense that they are uniformly distributed in the universe.
+ * </p>
+ * <p>
+ * Therefore, this algorithm is suitable for scenarios where uniform hashing
+ * is desired (e.g. in an open addressing hash table/hash set).
+ * </p>
+ */
+public class MurmurHasher implements ArrowBufHasher {
+
+ private final int seed;
+
+ /**
+ * Creates a default Murmur hasher, with seed 0.
+ */
+ public MurmurHasher() {
+ this(0);
+ }
+
+ /**
+ * Creates a Murmur hasher.
+ * @param seed the seed for the hasher.
+ */
+ public MurmurHasher(int seed) {
+ this.seed = seed;
+ }
+
+ @Override
+ public int hashCode(long address, long length) {
+ return hashCode(address, length, seed);
+ }
+
+ @Override
+ public int hashCode(ArrowBuf buf, long offset, long length) {
+ buf.checkBytes(offset, offset + length);
+ return hashCode(buf.memoryAddress() + offset, length);
+ }
+
+ /**
+ * Calculates the hash code for a memory region.
+ * @param buf the buffer for the memory region.
+ * @param offset offset within the buffer for the memory region.
+ * @param length length of the memory region.
+ * @param seed the seed.
+ * @return the hash code.
+ */
+ public static int hashCode(ArrowBuf buf, long offset, long length, int seed) {
+ buf.checkBytes(offset, offset + length);
+ return hashCode(buf.memoryAddress() + offset, length, seed);
+ }
+
+ /**
+ * Calculates the hash code for a memory region.
+ * @param address start address of the memory region.
+ * @param length length of the memory region.
+ * @param seed the seed.
+ * @return the hash code.
+ */
+ public static int hashCode(long address, long length, int seed) {
+ int index = 0;
+ int hash = seed;
+ while (index + 4 <= length) {
+ int intValue = MemoryUtil.UNSAFE.getInt(address + index);
+ hash = combineHashCode(hash, intValue);
+ index += 4;
+ }
+
+ if (index < length) {
+ // process remaining data as a integer in little endian
+ int intValue = 0;
+ for (int i = index - 1; i >= index; i--) {
+ intValue <<= 8;
+ intValue |= (MemoryUtil.UNSAFE.getByte(address + i) & 0x000000ff);
+ index += 1;
+ }
+ hash = combineHashCode(hash, intValue);
+ }
+ return finalizeHashCode(hash, length);
+ }
+
+ /**
+ * Combine the current hash code and a new int value to calculate
+ * a new hash code.
+ * @param currentHashCode the current hash code.
+ * @param intValue the new int value.
+ * @return the new hah code.
+ */
+ public static int combineHashCode(int currentHashCode, int intValue) {
+ int c1 = 0xcc9e2d51;
+ int c2 = 0x1b873593;
+ int r1 = 15;
+ int r2 = 13;
+ int m = 5;
+ int n = 0xe6546b64;
+
+ int k = intValue;
+ k = k * c1;
+ k = rotateLeft(k, r1);
+ k = k * c2;
+
+ int hash = currentHashCode;
+ hash = hash ^ k;
+ hash = rotateLeft(hash, r2);
+ hash = hash * m + n;
+
+ return hash;
+ }
+
+ /**
+ * Finalizing the hash code.
+ * @param hashCode the current hash code.
+ * @param length the length of the memory region.
+ * @return the finalized hash code.
+ */
+ public static int finalizeHashCode(int hashCode, long length) {
+ hashCode = hashCode ^ (int) length;
+
+ hashCode = hashCode ^ (hashCode >>> 16);
+ hashCode = hashCode * 0x85ebca6b;
+ hashCode = hashCode ^ (hashCode >>> 13);
+ hashCode = hashCode * 0xc2b2ae35;
+ hashCode = hashCode ^ (hashCode >>> 16);
+
+ return hashCode;
+ }
+
+ private static int rotateLeft(int value, int count) {
+ return (value << count) | (value >>> (32 - count));
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ MurmurHasher that = (MurmurHasher) o;
+ return seed == that.seed;
+ }
+
+ @Override
+ public int hashCode() {
+ return seed;
+ }
+}
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/SimpleHasher.java b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/SimpleHasher.java
new file mode 100644
index 000000000..da0ee4829
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/SimpleHasher.java
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory.util.hash;
+
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.util.MemoryUtil;
+
+/**
+ * A simple hasher that calculates the hash code of integers as is,
+ * and does not perform any finalization. So the computation is extremely
+ * efficient.
+ * <p>
+ * This algorithm only provides the most basic semantics for the hash code. That is,
+ * if two objects are equal, they must have equal hash code. However, the quality of the
+ * produced hash code may not be good. In other words, the generated hash codes are
+ * far from being uniformly distributed in the universe.
+ * </p>
+ * <p>
+ * Therefore, this algorithm is suitable only for scenarios where the most basic semantics
+ * of the hash code is required (e.g. in scenarios that require fast and proactive data pruning)
+ * </p>
+ * <p>
+ * An object of this class is stateless, so it can be shared between threads.
+ * </p>
+ */
+public class SimpleHasher implements ArrowBufHasher {
+
+ public static SimpleHasher INSTANCE = new SimpleHasher();
+
+ protected SimpleHasher() {
+ }
+
+ /**
+ * Calculates the hash code for a memory region.
+ * @param address start address of the memory region.
+ * @param length length of the memory region.
+ * @return the hash code.
+ */
+ public int hashCode(long address, long length) {
+ int hashValue = 0;
+ int index = 0;
+ while (index + 8 <= length) {
+ long longValue = MemoryUtil.UNSAFE.getLong(address + index);
+ int longHash = getLongHashCode(longValue);
+ hashValue = combineHashCode(hashValue, longHash);
+ index += 8;
+ }
+
+ if (index + 4 <= length) {
+ int intValue = MemoryUtil.UNSAFE.getInt(address + index);
+ int intHash = intValue;
+ hashValue = combineHashCode(hashValue, intHash);
+ index += 4;
+ }
+
+ while (index < length) {
+ byte byteValue = MemoryUtil.UNSAFE.getByte(address + index);
+ int byteHash = byteValue;
+ hashValue = combineHashCode(hashValue, byteHash);
+ index += 1;
+ }
+
+ return finalizeHashCode(hashValue);
+ }
+
+ /**
+ * Calculates the hash code for a memory region.
+ * @param buf the buffer for the memory region.
+ * @param offset offset within the buffer for the memory region.
+ * @param length length of the memory region.
+ * @return the hash code.
+ */
+ @Override
+ public int hashCode(ArrowBuf buf, long offset, long length) {
+ buf.checkBytes(offset, offset + length);
+ return hashCode(buf.memoryAddress() + offset, length);
+ }
+
+ protected int combineHashCode(int currentHashCode, int newHashCode) {
+ return currentHashCode * 37 + newHashCode;
+ }
+
+ protected int getLongHashCode(long longValue) {
+ return Long.hashCode(longValue);
+ }
+
+ protected int finalizeHashCode(int hashCode) {
+ return hashCode;
+ }
+
+ @Override
+ public int hashCode() {
+ return 123;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ return obj != null && (obj instanceof SimpleHasher);
+ }
+}
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/util/AutoCloseables.java b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/util/AutoCloseables.java
new file mode 100644
index 000000000..d965f2aed
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/util/AutoCloseables.java
@@ -0,0 +1,242 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.util;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.stream.StreamSupport;
+
+/**
+ * Utilities for AutoCloseable classes.
+ */
+public final class AutoCloseables {
+ // Utility class. Should not be instantiated
+ private AutoCloseables() {
+ }
+
+ /**
+ * Returns a new {@link AutoCloseable} that calls {@link #close(Iterable)} on <code>autoCloseables</code>
+ * when close is called.
+ */
+ public static AutoCloseable all(final Collection<? extends AutoCloseable> autoCloseables) {
+ return new AutoCloseable() {
+ @Override
+ public void close() throws Exception {
+ AutoCloseables.close(autoCloseables);
+ }
+ };
+ }
+
+ /**
+ * Closes all autoCloseables if not null and suppresses exceptions by adding them to t.
+ * @param t the throwable to add suppressed exception to
+ * @param autoCloseables the closeables to close
+ */
+ public static void close(Throwable t, AutoCloseable... autoCloseables) {
+ close(t, Arrays.asList(autoCloseables));
+ }
+
+ /**
+ * Closes all autoCloseables if not null and suppresses exceptions by adding them to t.
+ * @param t the throwable to add suppressed exception to
+ * @param autoCloseables the closeables to close
+ */
+ public static void close(Throwable t, Iterable<? extends AutoCloseable> autoCloseables) {
+ try {
+ close(autoCloseables);
+ } catch (Exception e) {
+ t.addSuppressed(e);
+ }
+ }
+
+ /**
+ * Closes all autoCloseables if not null and suppresses subsequent exceptions if more than one.
+ * @param autoCloseables the closeables to close
+ */
+ public static void close(AutoCloseable... autoCloseables) throws Exception {
+ close(Arrays.asList(autoCloseables));
+ }
+
+ /**
+ * Closes all autoCloseables if not null and suppresses subsequent exceptions if more than one.
+ * @param ac the closeables to close
+ */
+ public static void close(Iterable<? extends AutoCloseable> ac) throws Exception {
+ // this method can be called on a single object if it implements Iterable<AutoCloseable>
+ // like for example VectorContainer make sure we handle that properly
+ if (ac == null) {
+ return;
+ } else if (ac instanceof AutoCloseable) {
+ ((AutoCloseable) ac).close();
+ return;
+ }
+
+ Exception topLevelException = null;
+ for (AutoCloseable closeable : ac) {
+ try {
+ if (closeable != null) {
+ closeable.close();
+ }
+ } catch (Exception e) {
+ if (topLevelException == null) {
+ topLevelException = e;
+ } else if (e != topLevelException) {
+ topLevelException.addSuppressed(e);
+ }
+ }
+ }
+ if (topLevelException != null) {
+ throw topLevelException;
+ }
+ }
+
+ /**
+ * Calls {@link #close(Iterable)} on the flattened list of closeables.
+ */
+ @SafeVarargs
+ public static void close(Iterable<? extends AutoCloseable>...closeables) throws Exception {
+ close(flatten(closeables));
+ }
+
+ @SafeVarargs
+ private static Iterable<AutoCloseable> flatten(Iterable<? extends AutoCloseable>... closeables) {
+ return new Iterable<AutoCloseable>() {
+ // Cast from Iterable<? extends AutoCloseable> to Iterable<AutoCloseable> is safe in this context
+ // since there's no modification of the original collection
+ @SuppressWarnings("unchecked")
+ @Override
+ public Iterator<AutoCloseable> iterator() {
+ return Arrays.stream(closeables)
+ .flatMap((Iterable<? extends AutoCloseable> i)
+ -> StreamSupport.stream(((Iterable<AutoCloseable>) i).spliterator(), /*parallel=*/false))
+ .iterator();
+ }
+ };
+ }
+
+ /**
+ * Converts <code>ac</code> to a {@link Iterable} filtering out any null values.
+ */
+ public static Iterable<AutoCloseable> iter(AutoCloseable... ac) {
+ if (ac.length == 0) {
+ return Collections.emptyList();
+ } else {
+ final List<AutoCloseable> nonNullAc = new ArrayList<>();
+ for (AutoCloseable autoCloseable : ac) {
+ if (autoCloseable != null) {
+ nonNullAc.add(autoCloseable);
+ }
+ }
+ return nonNullAc;
+ }
+ }
+
+ /**
+ * A closeable wrapper that will close the underlying closeables if a commit does not occur.
+ */
+ public static class RollbackCloseable implements AutoCloseable {
+
+ private boolean commit = false;
+ private List<AutoCloseable> closeables;
+
+ public RollbackCloseable(AutoCloseable... closeables) {
+ this.closeables = new ArrayList<>(Arrays.asList(closeables));
+ }
+
+ public <T extends AutoCloseable> T add(T t) {
+ closeables.add(t);
+ return t;
+ }
+
+ /**
+ * Add all of <code>list</code> to the rollback list.
+ */
+ public void addAll(AutoCloseable... list) {
+ closeables.addAll(Arrays.asList(list));
+ }
+
+ /**
+ * Add all of <code>list</code> to the rollback list.
+ */
+ public void addAll(Iterable<? extends AutoCloseable> list) {
+ for (AutoCloseable ac : list) {
+ closeables.add(ac);
+ }
+ }
+
+ public void commit() {
+ commit = true;
+ }
+
+ @Override
+ public void close() throws Exception {
+ if (!commit) {
+ AutoCloseables.close(closeables);
+ }
+ }
+
+ }
+
+ /**
+ * Creates an {@link RollbackCloseable} from the given closeables.
+ */
+ public static RollbackCloseable rollbackable(AutoCloseable... closeables) {
+ return new RollbackCloseable(closeables);
+ }
+
+ /**
+ * close() an {@link java.lang.AutoCloseable} without throwing a (checked)
+ * {@link java.lang.Exception}. This wraps the close() call with a
+ * try-catch that will rethrow an Exception wrapped with a
+ * {@link java.lang.RuntimeException}, providing a way to call close()
+ * without having to do the try-catch everywhere or propagate the Exception.
+ *
+ * @param autoCloseable the AutoCloseable to close; may be null
+ * @throws RuntimeException if an Exception occurs; the Exception is
+ * wrapped by the RuntimeException
+ */
+ public static void closeNoChecked(final AutoCloseable autoCloseable) {
+ if (autoCloseable != null) {
+ try {
+ autoCloseable.close();
+ } catch (final Exception e) {
+ throw new RuntimeException("Exception while closing: " + e.getMessage(), e);
+ }
+ }
+ }
+
+ private static final AutoCloseable noOpAutocloseable = new AutoCloseable() {
+ @Override
+ public void close() {
+ }
+ };
+
+ /**
+ * Get an AutoCloseable that does nothing.
+ *
+ * @return A do-nothing autocloseable
+ */
+ public static AutoCloseable noop() {
+ return noOpAutocloseable;
+ }
+}
+
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/util/Collections2.java b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/util/Collections2.java
new file mode 100644
index 000000000..6b01a61eb
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/util/Collections2.java
@@ -0,0 +1,92 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.util;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Spliterator;
+import java.util.Spliterators;
+import java.util.stream.Collectors;
+import java.util.stream.StreamSupport;
+
+/**
+ * Utility methods for manipulating {@link java.util.Collections} and their subclasses/implementations.
+ */
+public final class Collections2 {
+ private Collections2() {}
+
+ /**
+ * Creates a {@link List} from the elements remaining in iterator.
+ */
+ public static <T> List<T> toList(Iterator<T> iterator) {
+ List<T> target = new ArrayList<>();
+ iterator.forEachRemaining(target::add);
+ return target;
+ }
+
+ /**
+ * Converts the iterable into a new {@link List}.
+ */
+ public static <T> List<T> toList(Iterable<T> iterable) {
+ if (iterable instanceof Collection<?>) {
+ // If iterable is a collection, take advantage of it for a more efficient copy
+ return new ArrayList<T>((Collection<T>) iterable);
+ }
+ return toList(iterable.iterator());
+ }
+
+ /**
+ * Converts the iterable into a new immutable {@link List}.
+ */
+ public static <T> List<T> toImmutableList(Iterable<T> iterable) {
+ return Collections.unmodifiableList(toList(iterable));
+ }
+
+
+ /** Copies the elements of <code>map</code> to a new unmodifiable map. */
+ public static <K, V> Map<K, V> immutableMapCopy(Map<K, V> map) {
+ return Collections.unmodifiableMap(new HashMap<>(map));
+ }
+
+ /** Copies the elements of list to a new unmodifiable list. */
+ public static <V> List<V> immutableListCopy(List<V> list) {
+ return Collections.unmodifiableList(new ArrayList<>(list));
+ }
+
+ /** Copies the values to a new unmodifiable list. */
+ public static <V> List<V> asImmutableList(V...values) {
+ return Collections.unmodifiableList(Arrays.asList(values));
+ }
+
+ /**
+ * Creates a human readable string from the remaining elements in iterator.
+ *
+ * The output should be similar to {@code Arrays#toString(Object[])}
+ */
+ public static String toString(Iterator<?> iterator) {
+ return StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false)
+ .map(String::valueOf)
+ .collect(Collectors.joining(", ", "[", "]"));
+ }
+}
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/util/Preconditions.java b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/util/Preconditions.java
new file mode 100644
index 000000000..0ffc9447e
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/util/Preconditions.java
@@ -0,0 +1,1323 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Lifted from Guava 20.0 to avoid dependency for core Arrow libraries.
+ *
+ * Copyright (C) 2007 The Guava Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package org.apache.arrow.util;
+
+/**
+ * Static convenience methods that help a method or constructor check whether it was invoked
+ * correctly (whether its <i>preconditions</i> have been met). These methods generally accept a
+ * {@code boolean} expression which is expected to be {@code true} (or in the case of {@code
+ * checkNotNull}, an object reference which is expected to be non-null). When {@code false} (or
+ * {@code null}) is passed instead, the {@code Preconditions} method throws an unchecked exception,
+ * which helps the calling method communicate to <i>its</i> caller that <i>that</i> caller has made
+ * a mistake. Example: <pre> {@code
+ *
+ * /**
+ * * Returns the positive square root of the given value.
+ * *
+ * * @throws IllegalArgumentException if the value is negative
+ * *}{@code /
+ * public static double sqrt(double value) {
+ * Preconditions.checkArgument(value >= 0.0, "negative value: %s", value);
+ * // calculate the square root
+ * }
+ *
+ * void exampleBadCaller() {
+ * double d = sqrt(-1.0);
+ * }}</pre>
+ *
+ * <p>In this example, {@code checkArgument} throws an {@code IllegalArgumentException} to indicate
+ * that {@code exampleBadCaller} made an error in <i>its</i> call to {@code sqrt}.
+ *
+ * <h3>Warning about performance</h3>
+ *
+ * <p>The goal of this class is to improve readability of code, but in some circumstances this may
+ * come at a significant performance cost. Remember that parameter values for message construction
+ * must all be computed eagerly, and autoboxing and varargs array creation may happen as well, even
+ * when the precondition check then succeeds (as it should almost always do in production). In some
+ * circumstances these wasted CPU cycles and allocations can add up to a real problem.
+ * Performance-sensitive precondition checks can always be converted to the customary form:
+ * <pre> {@code
+ *
+ * if (value < 0.0) {
+ * throw new IllegalArgumentException("negative value: " + value);
+ * }}</pre>
+ *
+ * <h3>Other types of preconditions</h3>
+ *
+ * <p>Not every type of precondition failure is supported by these methods. Continue to throw
+ * standard JDK exceptions such as {@link java.util.NoSuchElementException} or
+ * {@link UnsupportedOperationException} in the situations they are intended for.
+ *
+ * <h3>Non-preconditions</h3>
+ *
+ * <p>It is of course possible to use the methods of this class to check for invalid conditions
+ * which are <i>not the caller's fault</i>. Doing so is <b>not recommended</b> because it is
+ * misleading to future readers of the code and of stack traces. See
+ * <a href="https://github.com/google/guava/wiki/ConditionalFailuresExplained">Conditional failures
+ * explained</a> in the Guava User Guide for more advice.
+ *
+ * <h3>{@code java.util.Objects.requireNonNull()}</h3>
+ *
+ * <p>Projects which use {@code com.google.common} should generally avoid the use of
+ * {@link java.util.Objects#requireNonNull(Object)}. Instead, use whichever of
+ * {@link #checkNotNull(Object)} or {@link Verify#verifyNotNull(Object)} is appropriate to the
+ * situation. (The same goes for the message-accepting overloads.)
+ *
+ * <h3>Only {@code %s} is supported</h3>
+ *
+ * <p>In {@code Preconditions} error message template strings, only the {@code "%s"} specifier is
+ * supported, not the full range of {@link java.util.Formatter} specifiers.
+ *
+ * <h3>More information</h3>
+ *
+ * <p>See the Guava User Guide on
+ * <a href="https://github.com/google/guava/wiki/PreconditionsExplained">using {@code
+ * Preconditions}</a>.
+ *
+ * @author Kevin Bourrillion
+ * @since 2.0
+ */
+public final class Preconditions {
+ private Preconditions() {}
+
+ /**
+ * Ensures the truth of an expression involving one or more parameters to the calling method.
+ *
+ * @param expression a boolean expression
+ * @throws IllegalArgumentException if {@code expression} is false
+ */
+ public static void checkArgument(boolean expression) {
+ if (!expression) {
+ throw new IllegalArgumentException();
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving one or more parameters to the calling method.
+ *
+ * @param expression a boolean expression
+ * @param errorMessage the exception message to use if the check fails; will be converted to a
+ * string using {@link String#valueOf(Object)}
+ * @throws IllegalArgumentException if {@code expression} is false
+ */
+ public static void checkArgument(boolean expression, Object errorMessage) {
+ if (!expression) {
+ throw new IllegalArgumentException(String.valueOf(errorMessage));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving one or more parameters to the calling method.
+ *
+ * @param expression a boolean expression
+ * @param errorMessageTemplate a template for the exception message should the check fail. The
+ * message is formed by replacing each {@code %s} placeholder in the template with an
+ * argument. These are matched by position - the first {@code %s} gets {@code
+ * errorMessageArgs[0]}, etc. Unmatched arguments will be appended to the formatted message in
+ * square braces. Unmatched placeholders will be left as-is.
+ * @param errorMessageArgs the arguments to be substituted into the message template. Arguments
+ * are converted to strings using {@link String#valueOf(Object)}.
+ * @throws IllegalArgumentException if {@code expression} is false
+ * @throws NullPointerException if the check fails and either {@code errorMessageTemplate} or
+ * {@code errorMessageArgs} is null (don't let this happen)
+ */
+ public static void checkArgument(
+ boolean expression,
+ String errorMessageTemplate,
+ Object... errorMessageArgs) {
+ if (!expression) {
+ throw new IllegalArgumentException(format(errorMessageTemplate, errorMessageArgs));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving one or more parameters to the calling method.
+ *
+ * <p>See {@link #checkArgument(boolean, String, Object...)} for details.
+ */
+ public static void checkArgument(boolean b, String errorMessageTemplate, char p1) {
+ if (!b) {
+ throw new IllegalArgumentException(format(errorMessageTemplate, p1));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving one or more parameters to the calling method.
+ *
+ * <p>See {@link #checkArgument(boolean, String, Object...)} for details.
+ */
+ public static void checkArgument(boolean b, String errorMessageTemplate, int p1) {
+ if (!b) {
+ throw new IllegalArgumentException(format(errorMessageTemplate, p1));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving one or more parameters to the calling method.
+ *
+ * <p>See {@link #checkArgument(boolean, String, Object...)} for details.
+ */
+ public static void checkArgument(boolean b, String errorMessageTemplate, long p1) {
+ if (!b) {
+ throw new IllegalArgumentException(format(errorMessageTemplate, p1));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving one or more parameters to the calling method.
+ *
+ * <p>See {@link #checkArgument(boolean, String, Object...)} for details.
+ */
+ public static void checkArgument(
+ boolean b, String errorMessageTemplate, Object p1) {
+ if (!b) {
+ throw new IllegalArgumentException(format(errorMessageTemplate, p1));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving one or more parameters to the calling method.
+ *
+ * <p>See {@link #checkArgument(boolean, String, Object...)} for details.
+ */
+ public static void checkArgument(
+ boolean b, String errorMessageTemplate, char p1, char p2) {
+ if (!b) {
+ throw new IllegalArgumentException(format(errorMessageTemplate, p1, p2));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving one or more parameters to the calling method.
+ *
+ * <p>See {@link #checkArgument(boolean, String, Object...)} for details.
+ */
+ public static void checkArgument(
+ boolean b, String errorMessageTemplate, char p1, int p2) {
+ if (!b) {
+ throw new IllegalArgumentException(format(errorMessageTemplate, p1, p2));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving one or more parameters to the calling method.
+ *
+ * <p>See {@link #checkArgument(boolean, String, Object...)} for details.
+ */
+ public static void checkArgument(
+ boolean b, String errorMessageTemplate, char p1, long p2) {
+ if (!b) {
+ throw new IllegalArgumentException(format(errorMessageTemplate, p1, p2));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving one or more parameters to the calling method.
+ *
+ * <p>See {@link #checkArgument(boolean, String, Object...)} for details.
+ */
+ public static void checkArgument(
+ boolean b, String errorMessageTemplate, char p1, Object p2) {
+ if (!b) {
+ throw new IllegalArgumentException(format(errorMessageTemplate, p1, p2));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving one or more parameters to the calling method.
+ *
+ * <p>See {@link #checkArgument(boolean, String, Object...)} for details.
+ */
+ public static void checkArgument(
+ boolean b, String errorMessageTemplate, int p1, char p2) {
+ if (!b) {
+ throw new IllegalArgumentException(format(errorMessageTemplate, p1, p2));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving one or more parameters to the calling method.
+ *
+ * <p>See {@link #checkArgument(boolean, String, Object...)} for details.
+ */
+ public static void checkArgument(
+ boolean b, String errorMessageTemplate, int p1, int p2) {
+ if (!b) {
+ throw new IllegalArgumentException(format(errorMessageTemplate, p1, p2));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving one or more parameters to the calling method.
+ *
+ * <p>See {@link #checkArgument(boolean, String, Object...)} for details.
+ */
+ public static void checkArgument(
+ boolean b, String errorMessageTemplate, int p1, long p2) {
+ if (!b) {
+ throw new IllegalArgumentException(format(errorMessageTemplate, p1, p2));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving one or more parameters to the calling method.
+ *
+ * <p>See {@link #checkArgument(boolean, String, Object...)} for details.
+ */
+ public static void checkArgument(
+ boolean b, String errorMessageTemplate, int p1, Object p2) {
+ if (!b) {
+ throw new IllegalArgumentException(format(errorMessageTemplate, p1, p2));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving one or more parameters to the calling method.
+ *
+ * <p>See {@link #checkArgument(boolean, String, Object...)} for details.
+ */
+ public static void checkArgument(
+ boolean b, String errorMessageTemplate, long p1, char p2) {
+ if (!b) {
+ throw new IllegalArgumentException(format(errorMessageTemplate, p1, p2));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving one or more parameters to the calling method.
+ *
+ * <p>See {@link #checkArgument(boolean, String, Object...)} for details.
+ */
+ public static void checkArgument(
+ boolean b, String errorMessageTemplate, long p1, int p2) {
+ if (!b) {
+ throw new IllegalArgumentException(format(errorMessageTemplate, p1, p2));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving one or more parameters to the calling method.
+ *
+ * <p>See {@link #checkArgument(boolean, String, Object...)} for details.
+ */
+ public static void checkArgument(
+ boolean b, String errorMessageTemplate, long p1, long p2) {
+ if (!b) {
+ throw new IllegalArgumentException(format(errorMessageTemplate, p1, p2));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving one or more parameters to the calling method.
+ *
+ * <p>See {@link #checkArgument(boolean, String, Object...)} for details.
+ */
+ public static void checkArgument(
+ boolean b, String errorMessageTemplate, long p1, Object p2) {
+ if (!b) {
+ throw new IllegalArgumentException(format(errorMessageTemplate, p1, p2));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving one or more parameters to the calling method.
+ *
+ * <p>See {@link #checkArgument(boolean, String, Object...)} for details.
+ */
+ public static void checkArgument(
+ boolean b, String errorMessageTemplate, Object p1, char p2) {
+ if (!b) {
+ throw new IllegalArgumentException(format(errorMessageTemplate, p1, p2));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving one or more parameters to the calling method.
+ *
+ * <p>See {@link #checkArgument(boolean, String, Object...)} for details.
+ */
+ public static void checkArgument(
+ boolean b, String errorMessageTemplate, Object p1, int p2) {
+ if (!b) {
+ throw new IllegalArgumentException(format(errorMessageTemplate, p1, p2));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving one or more parameters to the calling method.
+ *
+ * <p>See {@link #checkArgument(boolean, String, Object...)} for details.
+ */
+ public static void checkArgument(
+ boolean b, String errorMessageTemplate, Object p1, long p2) {
+ if (!b) {
+ throw new IllegalArgumentException(format(errorMessageTemplate, p1, p2));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving one or more parameters to the calling method.
+ *
+ * <p>See {@link #checkArgument(boolean, String, Object...)} for details.
+ */
+ public static void checkArgument(
+ boolean b, String errorMessageTemplate, Object p1, Object p2) {
+ if (!b) {
+ throw new IllegalArgumentException(format(errorMessageTemplate, p1, p2));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving one or more parameters to the calling method.
+ *
+ * <p>See {@link #checkArgument(boolean, String, Object...)} for details.
+ */
+ public static void checkArgument(
+ boolean b,
+ String errorMessageTemplate,
+ Object p1,
+ Object p2,
+ Object p3) {
+ if (!b) {
+ throw new IllegalArgumentException(format(errorMessageTemplate, p1, p2, p3));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving one or more parameters to the calling method.
+ *
+ * <p>See {@link #checkArgument(boolean, String, Object...)} for details.
+ */
+ public static void checkArgument(
+ boolean b,
+ String errorMessageTemplate,
+ Object p1,
+ Object p2,
+ Object p3,
+ Object p4) {
+ if (!b) {
+ throw new IllegalArgumentException(format(errorMessageTemplate, p1, p2, p3, p4));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving the state of the calling instance, but not
+ * involving any parameters to the calling method.
+ *
+ * @param expression a boolean expression
+ * @throws IllegalStateException if {@code expression} is false
+ */
+ public static void checkState(boolean expression) {
+ if (!expression) {
+ throw new IllegalStateException();
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving the state of the calling instance, but not
+ * involving any parameters to the calling method.
+ *
+ * @param expression a boolean expression
+ * @param errorMessage the exception message to use if the check fails; will be converted to a
+ * string using {@link String#valueOf(Object)}
+ * @throws IllegalStateException if {@code expression} is false
+ */
+ public static void checkState(boolean expression, Object errorMessage) {
+ if (!expression) {
+ throw new IllegalStateException(String.valueOf(errorMessage));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving the state of the calling instance, but not
+ * involving any parameters to the calling method.
+ *
+ * @param expression a boolean expression
+ * @param errorMessageTemplate a template for the exception message should the check fail. The
+ * message is formed by replacing each {@code %s} placeholder in the template with an
+ * argument. These are matched by position - the first {@code %s} gets {@code
+ * errorMessageArgs[0]}, etc. Unmatched arguments will be appended to the formatted message in
+ * square braces. Unmatched placeholders will be left as-is.
+ * @param errorMessageArgs the arguments to be substituted into the message template. Arguments
+ * are converted to strings using {@link String#valueOf(Object)}.
+ * @throws IllegalStateException if {@code expression} is false
+ * @throws NullPointerException if the check fails and either {@code errorMessageTemplate} or
+ * {@code errorMessageArgs} is null (don't let this happen)
+ */
+ public static void checkState(
+ boolean expression,
+ String errorMessageTemplate,
+ Object... errorMessageArgs) {
+ if (!expression) {
+ throw new IllegalStateException(format(errorMessageTemplate, errorMessageArgs));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving the state of the calling instance, but not
+ * involving any parameters to the calling method.
+ *
+ * <p>See {@link #checkState(boolean, String, Object...)} for details.
+ */
+ public static void checkState(boolean b, String errorMessageTemplate, char p1) {
+ if (!b) {
+ throw new IllegalStateException(format(errorMessageTemplate, p1));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving the state of the calling instance, but not
+ * involving any parameters to the calling method.
+ *
+ * <p>See {@link #checkState(boolean, String, Object...)} for details.
+ */
+ public static void checkState(boolean b, String errorMessageTemplate, int p1) {
+ if (!b) {
+ throw new IllegalStateException(format(errorMessageTemplate, p1));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving the state of the calling instance, but not
+ * involving any parameters to the calling method.
+ *
+ * <p>See {@link #checkState(boolean, String, Object...)} for details.
+ */
+ public static void checkState(boolean b, String errorMessageTemplate, long p1) {
+ if (!b) {
+ throw new IllegalStateException(format(errorMessageTemplate, p1));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving the state of the calling instance, but not
+ * involving any parameters to the calling method.
+ *
+ * <p>See {@link #checkState(boolean, String, Object...)} for details.
+ */
+ public static void checkState(
+ boolean b, String errorMessageTemplate, Object p1) {
+ if (!b) {
+ throw new IllegalStateException(format(errorMessageTemplate, p1));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving the state of the calling instance, but not
+ * involving any parameters to the calling method.
+ *
+ * <p>See {@link #checkState(boolean, String, Object...)} for details.
+ */
+ public static void checkState(
+ boolean b, String errorMessageTemplate, char p1, char p2) {
+ if (!b) {
+ throw new IllegalStateException(format(errorMessageTemplate, p1, p2));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving the state of the calling instance, but not
+ * involving any parameters to the calling method.
+ *
+ * <p>See {@link #checkState(boolean, String, Object...)} for details.
+ */
+ public static void checkState(boolean b, String errorMessageTemplate, char p1, int p2) {
+ if (!b) {
+ throw new IllegalStateException(format(errorMessageTemplate, p1, p2));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving the state of the calling instance, but not
+ * involving any parameters to the calling method.
+ *
+ * <p>See {@link #checkState(boolean, String, Object...)} for details.
+ */
+ public static void checkState(
+ boolean b, String errorMessageTemplate, char p1, long p2) {
+ if (!b) {
+ throw new IllegalStateException(format(errorMessageTemplate, p1, p2));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving the state of the calling instance, but not
+ * involving any parameters to the calling method.
+ *
+ * <p>See {@link #checkState(boolean, String, Object...)} for details.
+ */
+ public static void checkState(
+ boolean b, String errorMessageTemplate, char p1, Object p2) {
+ if (!b) {
+ throw new IllegalStateException(format(errorMessageTemplate, p1, p2));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving the state of the calling instance, but not
+ * involving any parameters to the calling method.
+ *
+ * <p>See {@link #checkState(boolean, String, Object...)} for details.
+ */
+ public static void checkState(boolean b, String errorMessageTemplate, int p1, char p2) {
+ if (!b) {
+ throw new IllegalStateException(format(errorMessageTemplate, p1, p2));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving the state of the calling instance, but not
+ * involving any parameters to the calling method.
+ *
+ * <p>See {@link #checkState(boolean, String, Object...)} for details.
+ */
+ public static void checkState(boolean b, String errorMessageTemplate, int p1, int p2) {
+ if (!b) {
+ throw new IllegalStateException(format(errorMessageTemplate, p1, p2));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving the state of the calling instance, but not
+ * involving any parameters to the calling method.
+ *
+ * <p>See {@link #checkState(boolean, String, Object...)} for details.
+ */
+ public static void checkState(boolean b, String errorMessageTemplate, int p1, long p2) {
+ if (!b) {
+ throw new IllegalStateException(format(errorMessageTemplate, p1, p2));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving the state of the calling instance, but not
+ * involving any parameters to the calling method.
+ *
+ * <p>See {@link #checkState(boolean, String, Object...)} for details.
+ */
+ public static void checkState(
+ boolean b, String errorMessageTemplate, int p1, Object p2) {
+ if (!b) {
+ throw new IllegalStateException(format(errorMessageTemplate, p1, p2));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving the state of the calling instance, but not
+ * involving any parameters to the calling method.
+ *
+ * <p>See {@link #checkState(boolean, String, Object...)} for details.
+ */
+ public static void checkState(
+ boolean b, String errorMessageTemplate, long p1, char p2) {
+ if (!b) {
+ throw new IllegalStateException(format(errorMessageTemplate, p1, p2));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving the state of the calling instance, but not
+ * involving any parameters to the calling method.
+ *
+ * <p>See {@link #checkState(boolean, String, Object...)} for details.
+ */
+ public static void checkState(boolean b, String errorMessageTemplate, long p1, int p2) {
+ if (!b) {
+ throw new IllegalStateException(format(errorMessageTemplate, p1, p2));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving the state of the calling instance, but not
+ * involving any parameters to the calling method.
+ *
+ * <p>See {@link #checkState(boolean, String, Object...)} for details.
+ */
+ public static void checkState(
+ boolean b, String errorMessageTemplate, long p1, long p2) {
+ if (!b) {
+ throw new IllegalStateException(format(errorMessageTemplate, p1, p2));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving the state of the calling instance, but not
+ * involving any parameters to the calling method.
+ *
+ * <p>See {@link #checkState(boolean, String, Object...)} for details.
+ */
+ public static void checkState(
+ boolean b, String errorMessageTemplate, long p1, Object p2) {
+ if (!b) {
+ throw new IllegalStateException(format(errorMessageTemplate, p1, p2));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving the state of the calling instance, but not
+ * involving any parameters to the calling method.
+ *
+ * <p>See {@link #checkState(boolean, String, Object...)} for details.
+ */
+ public static void checkState(
+ boolean b, String errorMessageTemplate, Object p1, char p2) {
+ if (!b) {
+ throw new IllegalStateException(format(errorMessageTemplate, p1, p2));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving the state of the calling instance, but not
+ * involving any parameters to the calling method.
+ *
+ * <p>See {@link #checkState(boolean, String, Object...)} for details.
+ */
+ public static void checkState(
+ boolean b, String errorMessageTemplate, Object p1, int p2) {
+ if (!b) {
+ throw new IllegalStateException(format(errorMessageTemplate, p1, p2));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving the state of the calling instance, but not
+ * involving any parameters to the calling method.
+ *
+ * <p>See {@link #checkState(boolean, String, Object...)} for details.
+ */
+ public static void checkState(
+ boolean b, String errorMessageTemplate, Object p1, long p2) {
+ if (!b) {
+ throw new IllegalStateException(format(errorMessageTemplate, p1, p2));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving the state of the calling instance, but not
+ * involving any parameters to the calling method.
+ *
+ * <p>See {@link #checkState(boolean, String, Object...)} for details.
+ */
+ public static void checkState(
+ boolean b, String errorMessageTemplate, Object p1, Object p2) {
+ if (!b) {
+ throw new IllegalStateException(format(errorMessageTemplate, p1, p2));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving the state of the calling instance, but not
+ * involving any parameters to the calling method.
+ *
+ * <p>See {@link #checkState(boolean, String, Object...)} for details.
+ */
+ public static void checkState(
+ boolean b,
+ String errorMessageTemplate,
+ Object p1,
+ Object p2,
+ Object p3) {
+ if (!b) {
+ throw new IllegalStateException(format(errorMessageTemplate, p1, p2, p3));
+ }
+ }
+
+ /**
+ * Ensures the truth of an expression involving the state of the calling instance, but not
+ * involving any parameters to the calling method.
+ *
+ * <p>See {@link #checkState(boolean, String, Object...)} for details.
+ */
+ public static void checkState(
+ boolean b,
+ String errorMessageTemplate,
+ Object p1,
+ Object p2,
+ Object p3,
+ Object p4) {
+ if (!b) {
+ throw new IllegalStateException(format(errorMessageTemplate, p1, p2, p3, p4));
+ }
+ }
+
+ /**
+ * Ensures that an object reference passed as a parameter to the calling method is not null.
+ *
+ * @param reference an object reference
+ * @return the non-null reference that was validated
+ * @throws NullPointerException if {@code reference} is null
+ */
+
+ public static <T> T checkNotNull(T reference) {
+ if (reference == null) {
+ throw new NullPointerException();
+ }
+ return reference;
+ }
+
+ /**
+ * Ensures that an object reference passed as a parameter to the calling method is not null.
+ *
+ * @param reference an object reference
+ * @param errorMessage the exception message to use if the check fails; will be converted to a
+ * string using {@link String#valueOf(Object)}
+ * @return the non-null reference that was validated
+ * @throws NullPointerException if {@code reference} is null
+ */
+
+ public static <T> T checkNotNull(T reference, Object errorMessage) {
+ if (reference == null) {
+ throw new NullPointerException(String.valueOf(errorMessage));
+ }
+ return reference;
+ }
+
+ /**
+ * Ensures that an object reference passed as a parameter to the calling method is not null.
+ *
+ * @param reference an object reference
+ * @param errorMessageTemplate a template for the exception message should the check fail. The
+ * message is formed by replacing each {@code %s} placeholder in the template with an
+ * argument. These are matched by position - the first {@code %s} gets {@code
+ * errorMessageArgs[0]}, etc. Unmatched arguments will be appended to the formatted message in
+ * square braces. Unmatched placeholders will be left as-is.
+ * @param errorMessageArgs the arguments to be substituted into the message template. Arguments
+ * are converted to strings using {@link String#valueOf(Object)}.
+ * @return the non-null reference that was validated
+ * @throws NullPointerException if {@code reference} is null
+ */
+
+ public static <T> T checkNotNull(
+ T reference, String errorMessageTemplate, Object... errorMessageArgs) {
+ if (reference == null) {
+ // If either of these parameters is null, the right thing happens anyway
+ throw new NullPointerException(format(errorMessageTemplate, errorMessageArgs));
+ }
+ return reference;
+ }
+
+ /**
+ * Ensures that an object reference passed as a parameter to the calling method is not null.
+ *
+ * <p>See {@link #checkNotNull(Object, String, Object...)} for details.
+ */
+
+ public static <T> T checkNotNull(T obj, String errorMessageTemplate, char p1) {
+ if (obj == null) {
+ throw new NullPointerException(format(errorMessageTemplate, p1));
+ }
+ return obj;
+ }
+
+ /**
+ * Ensures that an object reference passed as a parameter to the calling method is not null.
+ *
+ * <p>See {@link #checkNotNull(Object, String, Object...)} for details.
+ */
+
+ public static <T> T checkNotNull(T obj, String errorMessageTemplate, int p1) {
+ if (obj == null) {
+ throw new NullPointerException(format(errorMessageTemplate, p1));
+ }
+ return obj;
+ }
+
+ /**
+ * Ensures that an object reference passed as a parameter to the calling method is not null.
+ *
+ * <p>See {@link #checkNotNull(Object, String, Object...)} for details.
+ */
+
+ public static <T> T checkNotNull(T obj, String errorMessageTemplate, long p1) {
+ if (obj == null) {
+ throw new NullPointerException(format(errorMessageTemplate, p1));
+ }
+ return obj;
+ }
+
+ /**
+ * Ensures that an object reference passed as a parameter to the calling method is not null.
+ *
+ * <p>See {@link #checkNotNull(Object, String, Object...)} for details.
+ */
+
+ public static <T> T checkNotNull(
+ T obj, String errorMessageTemplate, Object p1) {
+ if (obj == null) {
+ throw new NullPointerException(format(errorMessageTemplate, p1));
+ }
+ return obj;
+ }
+
+ /**
+ * Ensures that an object reference passed as a parameter to the calling method is not null.
+ *
+ * <p>See {@link #checkNotNull(Object, String, Object...)} for details.
+ */
+
+ public static <T> T checkNotNull(T obj, String errorMessageTemplate, char p1, char p2) {
+ if (obj == null) {
+ throw new NullPointerException(format(errorMessageTemplate, p1, p2));
+ }
+ return obj;
+ }
+
+ /**
+ * Ensures that an object reference passed as a parameter to the calling method is not null.
+ *
+ * <p>See {@link #checkNotNull(Object, String, Object...)} for details.
+ */
+
+ public static <T> T checkNotNull(T obj, String errorMessageTemplate, char p1, int p2) {
+ if (obj == null) {
+ throw new NullPointerException(format(errorMessageTemplate, p1, p2));
+ }
+ return obj;
+ }
+
+ /**
+ * Ensures that an object reference passed as a parameter to the calling method is not null.
+ *
+ * <p>See {@link #checkNotNull(Object, String, Object...)} for details.
+ */
+
+ public static <T> T checkNotNull(T obj, String errorMessageTemplate, char p1, long p2) {
+ if (obj == null) {
+ throw new NullPointerException(format(errorMessageTemplate, p1, p2));
+ }
+ return obj;
+ }
+
+ /**
+ * Ensures that an object reference passed as a parameter to the calling method is not null.
+ *
+ * <p>See {@link #checkNotNull(Object, String, Object...)} for details.
+ */
+
+ public static <T> T checkNotNull(
+ T obj, String errorMessageTemplate, char p1, Object p2) {
+ if (obj == null) {
+ throw new NullPointerException(format(errorMessageTemplate, p1, p2));
+ }
+ return obj;
+ }
+
+ /**
+ * Ensures that an object reference passed as a parameter to the calling method is not null.
+ *
+ * <p>See {@link #checkNotNull(Object, String, Object...)} for details.
+ */
+
+ public static <T> T checkNotNull(T obj, String errorMessageTemplate, int p1, char p2) {
+ if (obj == null) {
+ throw new NullPointerException(format(errorMessageTemplate, p1, p2));
+ }
+ return obj;
+ }
+
+ /**
+ * Ensures that an object reference passed as a parameter to the calling method is not null.
+ *
+ * <p>See {@link #checkNotNull(Object, String, Object...)} for details.
+ */
+
+ public static <T> T checkNotNull(T obj, String errorMessageTemplate, int p1, int p2) {
+ if (obj == null) {
+ throw new NullPointerException(format(errorMessageTemplate, p1, p2));
+ }
+ return obj;
+ }
+
+ /**
+ * Ensures that an object reference passed as a parameter to the calling method is not null.
+ *
+ * <p>See {@link #checkNotNull(Object, String, Object...)} for details.
+ */
+
+ public static <T> T checkNotNull(T obj, String errorMessageTemplate, int p1, long p2) {
+ if (obj == null) {
+ throw new NullPointerException(format(errorMessageTemplate, p1, p2));
+ }
+ return obj;
+ }
+
+ /**
+ * Ensures that an object reference passed as a parameter to the calling method is not null.
+ *
+ * <p>See {@link #checkNotNull(Object, String, Object...)} for details.
+ */
+
+ public static <T> T checkNotNull(
+ T obj, String errorMessageTemplate, int p1, Object p2) {
+ if (obj == null) {
+ throw new NullPointerException(format(errorMessageTemplate, p1, p2));
+ }
+ return obj;
+ }
+
+ /**
+ * Ensures that an object reference passed as a parameter to the calling method is not null.
+ *
+ * <p>See {@link #checkNotNull(Object, String, Object...)} for details.
+ */
+
+ public static <T> T checkNotNull(T obj, String errorMessageTemplate, long p1, char p2) {
+ if (obj == null) {
+ throw new NullPointerException(format(errorMessageTemplate, p1, p2));
+ }
+ return obj;
+ }
+
+ /**
+ * Ensures that an object reference passed as a parameter to the calling method is not null.
+ *
+ * <p>See {@link #checkNotNull(Object, String, Object...)} for details.
+ */
+
+ public static <T> T checkNotNull(T obj, String errorMessageTemplate, long p1, int p2) {
+ if (obj == null) {
+ throw new NullPointerException(format(errorMessageTemplate, p1, p2));
+ }
+ return obj;
+ }
+
+ /**
+ * Ensures that an object reference passed as a parameter to the calling method is not null.
+ *
+ * <p>See {@link #checkNotNull(Object, String, Object...)} for details.
+ */
+
+ public static <T> T checkNotNull(T obj, String errorMessageTemplate, long p1, long p2) {
+ if (obj == null) {
+ throw new NullPointerException(format(errorMessageTemplate, p1, p2));
+ }
+ return obj;
+ }
+
+ /**
+ * Ensures that an object reference passed as a parameter to the calling method is not null.
+ *
+ * <p>See {@link #checkNotNull(Object, String, Object...)} for details.
+ */
+
+ public static <T> T checkNotNull(
+ T obj, String errorMessageTemplate, long p1, Object p2) {
+ if (obj == null) {
+ throw new NullPointerException(format(errorMessageTemplate, p1, p2));
+ }
+ return obj;
+ }
+
+ /**
+ * Ensures that an object reference passed as a parameter to the calling method is not null.
+ *
+ * <p>See {@link #checkNotNull(Object, String, Object...)} for details.
+ */
+
+ public static <T> T checkNotNull(
+ T obj, String errorMessageTemplate, Object p1, char p2) {
+ if (obj == null) {
+ throw new NullPointerException(format(errorMessageTemplate, p1, p2));
+ }
+ return obj;
+ }
+
+ /**
+ * Ensures that an object reference passed as a parameter to the calling method is not null.
+ *
+ * <p>See {@link #checkNotNull(Object, String, Object...)} for details.
+ */
+
+ public static <T> T checkNotNull(
+ T obj, String errorMessageTemplate, Object p1, int p2) {
+ if (obj == null) {
+ throw new NullPointerException(format(errorMessageTemplate, p1, p2));
+ }
+ return obj;
+ }
+
+ /**
+ * Ensures that an object reference passed as a parameter to the calling method is not null.
+ *
+ * <p>See {@link #checkNotNull(Object, String, Object...)} for details.
+ */
+
+ public static <T> T checkNotNull(
+ T obj, String errorMessageTemplate, Object p1, long p2) {
+ if (obj == null) {
+ throw new NullPointerException(format(errorMessageTemplate, p1, p2));
+ }
+ return obj;
+ }
+
+ /**
+ * Ensures that an object reference passed as a parameter to the calling method is not null.
+ *
+ * <p>See {@link #checkNotNull(Object, String, Object...)} for details.
+ */
+
+ public static <T> T checkNotNull(
+ T obj, String errorMessageTemplate, Object p1, Object p2) {
+ if (obj == null) {
+ throw new NullPointerException(format(errorMessageTemplate, p1, p2));
+ }
+ return obj;
+ }
+
+ /**
+ * Ensures that an object reference passed as a parameter to the calling method is not null.
+ *
+ * <p>See {@link #checkNotNull(Object, String, Object...)} for details.
+ */
+
+ public static <T> T checkNotNull(
+ T obj,
+ String errorMessageTemplate,
+ Object p1,
+ Object p2,
+ Object p3) {
+ if (obj == null) {
+ throw new NullPointerException(format(errorMessageTemplate, p1, p2, p3));
+ }
+ return obj;
+ }
+
+ /**
+ * Ensures that an object reference passed as a parameter to the calling method is not null.
+ *
+ * <p>See {@link #checkNotNull(Object, String, Object...)} for details.
+ */
+
+ public static <T> T checkNotNull(
+ T obj,
+ String errorMessageTemplate,
+ Object p1,
+ Object p2,
+ Object p3,
+ Object p4) {
+ if (obj == null) {
+ throw new NullPointerException(format(errorMessageTemplate, p1, p2, p3, p4));
+ }
+ return obj;
+ }
+
+ /*
+ * All recent hotspots (as of 2009) *really* like to have the natural code
+ *
+ * if (guardExpression) {
+ * throw new BadException(messageExpression);
+ * }
+ *
+ * refactored so that messageExpression is moved to a separate String-returning method.
+ *
+ * if (guardExpression) {
+ * throw new BadException(badMsg(...));
+ * }
+ *
+ * The alternative natural refactorings into void or Exception-returning methods are much slower.
+ * This is a big deal - we're talking factors of 2-8 in microbenchmarks, not just 10-20%. (This is
+ * a hotspot optimizer bug, which should be fixed, but that's a separate, big project).
+ *
+ * The coding pattern above is heavily used in java.util, e.g. in ArrayList. There is a
+ * RangeCheckMicroBenchmark in the JDK that was used to test this.
+ *
+ * But the methods in this class want to throw different exceptions, depending on the args, so it
+ * appears that this pattern is not directly applicable. But we can use the ridiculous, devious
+ * trick of throwing an exception in the middle of the construction of another exception. Hotspot
+ * is fine with that.
+ */
+
+ /**
+ * Ensures that {@code index} specifies a valid <i>element</i> in an array, list or string of size
+ * {@code size}. An element index may range from zero, inclusive, to {@code size}, exclusive.
+ *
+ * @param index a user-supplied index identifying an element of an array, list or string
+ * @param size the size of that array, list or string
+ * @return the value of {@code index}
+ * @throws IndexOutOfBoundsException if {@code index} is negative or is not less than {@code size}
+ * @throws IllegalArgumentException if {@code size} is negative
+ */
+
+ public static int checkElementIndex(int index, int size) {
+ return checkElementIndex(index, size, "index");
+ }
+
+ /**
+ * Ensures that {@code index} specifies a valid <i>element</i> in an array, list or string of size
+ * {@code size}. An element index may range from zero, inclusive, to {@code size}, exclusive.
+ *
+ * @param index a user-supplied index identifying an element of an array, list or string
+ * @param size the size of that array, list or string
+ * @param desc the text to use to describe this index in an error message
+ * @return the value of {@code index}
+ * @throws IndexOutOfBoundsException if {@code index} is negative or is not less than {@code size}
+ * @throws IllegalArgumentException if {@code size} is negative
+ */
+
+ public static int checkElementIndex(int index, int size, String desc) {
+ // Carefully optimized for execution by hotspot (explanatory comment above)
+ if (index < 0 || index >= size) {
+ throw new IndexOutOfBoundsException(badElementIndex(index, size, desc));
+ }
+ return index;
+ }
+
+ private static String badElementIndex(int index, int size, String desc) {
+ if (index < 0) {
+ return format("%s (%s) must not be negative", desc, index);
+ } else if (size < 0) {
+ throw new IllegalArgumentException("negative size: " + size);
+ } else { // index >= size
+ return format("%s (%s) must be less than size (%s)", desc, index, size);
+ }
+ }
+
+ /**
+ * Ensures that {@code index} specifies a valid <i>position</i> in an array, list or string of
+ * size {@code size}. A position index may range from zero to {@code size}, inclusive.
+ *
+ * @param index a user-supplied index identifying a position in an array, list or string
+ * @param size the size of that array, list or string
+ * @return the value of {@code index}
+ * @throws IndexOutOfBoundsException if {@code index} is negative or is greater than {@code size}
+ * @throws IllegalArgumentException if {@code size} is negative
+ */
+
+ public static long checkPositionIndex(long index, long size) {
+ return checkPositionIndex(index, size, "index");
+ }
+
+ /**
+ * Ensures that {@code index} specifies a valid <i>position</i> in an array, list or string of
+ * size {@code size}. A position index may range from zero to {@code size}, inclusive.
+ *
+ * @param index a user-supplied index identifying a position in an array, list or string
+ * @param size the size of that array, list or string
+ * @param desc the text to use to describe this index in an error message
+ * @return the value of {@code index}
+ * @throws IndexOutOfBoundsException if {@code index} is negative or is greater than {@code size}
+ * @throws IllegalArgumentException if {@code size} is negative
+ */
+
+ public static long checkPositionIndex(long index, long size, String desc) {
+ // Carefully optimized for execution by hotspot (explanatory comment above)
+ if (index < 0 || index > size) {
+ throw new IndexOutOfBoundsException(badPositionIndex(index, size, desc));
+ }
+ return index;
+ }
+
+ private static String badPositionIndex(long index, long size, String desc) {
+ if (index < 0) {
+ return format("%s (%s) must not be negative", desc, index);
+ } else if (size < 0) {
+ throw new IllegalArgumentException("negative size: " + size);
+ } else { // index > size
+ return format("%s (%s) must not be greater than size (%s)", desc, index, size);
+ }
+ }
+
+ /**
+ * Ensures that {@code start} and {@code end} specify a valid <i>positions</i> in an array, list
+ * or string of size {@code size}, and are in order. A position index may range from zero to
+ * {@code size}, inclusive.
+ *
+ * @param start a user-supplied index identifying a starting position in an array, list or string
+ * @param end a user-supplied index identifying a ending position in an array, list or string
+ * @param size the size of that array, list or string
+ * @throws IndexOutOfBoundsException if either index is negative or is greater than {@code size},
+ * or if {@code end} is less than {@code start}
+ * @throws IllegalArgumentException if {@code size} is negative
+ */
+ public static void checkPositionIndexes(int start, int end, int size) {
+ // Carefully optimized for execution by hotspot (explanatory comment above)
+ if (start < 0 || end < start || end > size) {
+ throw new IndexOutOfBoundsException(badPositionIndexes(start, end, size));
+ }
+ }
+
+ private static String badPositionIndexes(int start, int end, int size) {
+ if (start < 0 || start > size) {
+ return badPositionIndex(start, size, "start index");
+ }
+ if (end < 0 || end > size) {
+ return badPositionIndex(end, size, "end index");
+ }
+ // end < start
+ return format("end index (%s) must not be less than start index (%s)", end, start);
+ }
+
+ /**
+ * Substitutes each {@code %s} in {@code template} with an argument. These are matched by
+ * position: the first {@code %s} gets {@code args[0]}, etc. If there are more arguments than
+ * placeholders, the unmatched arguments will be appended to the end of the formatted message in
+ * square braces.
+ *
+ * @param template a non-null string containing 0 or more {@code %s} placeholders.
+ * @param args the arguments to be substituted into the message template. Arguments are converted
+ * to strings using {@link String#valueOf(Object)}. Arguments can be null.
+ */
+ // Note that this is somewhat-improperly used from Verify.java as well.
+ static String format(String template, Object... args) {
+ template = String.valueOf(template); // null -> "null"
+
+ // start substituting the arguments into the '%s' placeholders
+ StringBuilder builder = new StringBuilder(template.length() + 16 * args.length);
+ int templateStart = 0;
+ int i = 0;
+ while (i < args.length) {
+ int placeholderStart = template.indexOf("%s", templateStart);
+ if (placeholderStart == -1) {
+ break;
+ }
+ builder.append(template, templateStart, placeholderStart);
+ builder.append(args[i++]);
+ templateStart = placeholderStart + 2;
+ }
+ builder.append(template, templateStart, template.length());
+
+ // if we run out of placeholders, append the extra args in square braces
+ if (i < args.length) {
+ builder.append(" [");
+ builder.append(args[i++]);
+ while (i < args.length) {
+ builder.append(", ");
+ builder.append(args[i++]);
+ }
+ builder.append(']');
+ }
+
+ return builder.toString();
+ }
+}
diff --git a/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/util/VisibleForTesting.java b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/util/VisibleForTesting.java
new file mode 100644
index 000000000..b6ed378bb
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/main/java/org/apache/arrow/util/VisibleForTesting.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.util;
+
+/**
+ * Annotation to indicate a class member or class is visible
+ * only for the purposes of testing and otherwise should not
+ * be referenced by other classes.
+ */
+public @interface VisibleForTesting {
+}
diff --git a/src/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/memory/DefaultAllocationManagerFactory.java b/src/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/memory/DefaultAllocationManagerFactory.java
new file mode 100644
index 000000000..bfe496532
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/memory/DefaultAllocationManagerFactory.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+import org.apache.arrow.memory.util.MemoryUtil;
+
+/**
+ * The default Allocation Manager Factory for a module.
+ *
+ * This is only used by tests and contains only a simplistic allocator method.
+ *
+ */
+public class DefaultAllocationManagerFactory implements AllocationManager.Factory {
+
+ public static final AllocationManager.Factory FACTORY = new DefaultAllocationManagerFactory();
+ private static final ArrowBuf EMPTY = new ArrowBuf(ReferenceManager.NO_OP,
+ null,
+ 0,
+ MemoryUtil.UNSAFE.allocateMemory(0));
+
+ @Override
+ public AllocationManager create(BufferAllocator accountingAllocator, long size) {
+ return new AllocationManager(accountingAllocator) {
+ private final long allocatedSize = size;
+ private final long address = MemoryUtil.UNSAFE.allocateMemory(size);
+
+ @Override
+ public long getSize() {
+ return allocatedSize;
+ }
+
+ @Override
+ protected long memoryAddress() {
+ return address;
+ }
+
+ @Override
+ protected void release0() {
+ MemoryUtil.UNSAFE.freeMemory(address);
+ }
+ };
+ }
+
+ @Override
+ public ArrowBuf empty() {
+ return EMPTY;
+ }
+}
diff --git a/src/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestAccountant.java b/src/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestAccountant.java
new file mode 100644
index 000000000..13fa4a64d
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestAccountant.java
@@ -0,0 +1,172 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+import static org.junit.Assert.assertEquals;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestAccountant {
+
+ @Test
+ public void basic() {
+ ensureAccurateReservations(null);
+ }
+
+ @Test
+ public void nested() {
+ final Accountant parent = new Accountant(null, "test", 0, Long.MAX_VALUE);
+ ensureAccurateReservations(parent);
+ assertEquals(0, parent.getAllocatedMemory());
+ assertEquals(parent.getLimit() - parent.getAllocatedMemory(), parent.getHeadroom());
+ }
+
+ @Test
+ public void multiThread() throws InterruptedException {
+ final Accountant parent = new Accountant(null, "test", 0, Long.MAX_VALUE);
+
+ final int numberOfThreads = 32;
+ final int loops = 100;
+ Thread[] threads = new Thread[numberOfThreads];
+
+ for (int i = 0; i < numberOfThreads; i++) {
+ Thread t = new Thread() {
+
+ @Override
+ public void run() {
+ try {
+ for (int i = 0; i < loops; i++) {
+ ensureAccurateReservations(parent);
+ }
+ } catch (Exception ex) {
+ ex.printStackTrace();
+ Assert.fail(ex.getMessage());
+ }
+ }
+
+ };
+ threads[i] = t;
+ t.start();
+ }
+
+ for (Thread thread : threads) {
+ thread.join();
+ }
+
+ assertEquals(0, parent.getAllocatedMemory());
+ assertEquals(parent.getLimit() - parent.getAllocatedMemory(), parent.getHeadroom());
+ }
+
+ private void ensureAccurateReservations(Accountant outsideParent) {
+ final Accountant parent = new Accountant(outsideParent, "test", 0, 10);
+ assertEquals(0, parent.getAllocatedMemory());
+
+ final Accountant child = new Accountant(parent, "test", 2, Long.MAX_VALUE);
+ assertEquals(2, parent.getAllocatedMemory());
+ assertEquals(10, child.getHeadroom());
+ {
+ AllocationOutcome first = child.allocateBytes(1);
+ assertEquals(AllocationOutcome.Status.SUCCESS, first.getStatus());
+ }
+
+ // child will have new allocation
+ assertEquals(1, child.getAllocatedMemory());
+
+ // root has no change since within reservation
+ assertEquals(2, parent.getAllocatedMemory());
+
+ {
+ AllocationOutcome first = child.allocateBytes(1);
+ assertEquals(AllocationOutcome.Status.SUCCESS, first.getStatus());
+ }
+
+ // child will have new allocation
+ assertEquals(2, child.getAllocatedMemory());
+
+ // root has no change since within reservation
+ assertEquals(2, parent.getAllocatedMemory());
+
+ child.releaseBytes(1);
+
+ // child will have new allocation
+ assertEquals(1, child.getAllocatedMemory());
+
+ // root has no change since within reservation
+ assertEquals(2, parent.getAllocatedMemory());
+
+ {
+ AllocationOutcome first = child.allocateBytes(2);
+ assertEquals(AllocationOutcome.Status.SUCCESS, first.getStatus());
+ }
+
+ // child will have new allocation
+ assertEquals(3, child.getAllocatedMemory());
+
+ // went beyond reservation, now in parent accountant
+ assertEquals(3, parent.getAllocatedMemory());
+
+ assertEquals(7, child.getHeadroom());
+ assertEquals(7, parent.getHeadroom());
+
+ {
+ AllocationOutcome first = child.allocateBytes(7);
+ assertEquals(AllocationOutcome.Status.SUCCESS, first.getStatus());
+ }
+
+ // child will have new allocation
+ assertEquals(10, child.getAllocatedMemory());
+
+ // went beyond reservation, now in parent accountant
+ assertEquals(10, parent.getAllocatedMemory());
+
+ child.releaseBytes(9);
+
+ assertEquals(1, child.getAllocatedMemory());
+ assertEquals(9, child.getHeadroom());
+
+ // back to reservation size
+ assertEquals(2, parent.getAllocatedMemory());
+ assertEquals(8, parent.getHeadroom());
+
+ AllocationOutcome first = child.allocateBytes(10);
+ assertEquals(AllocationOutcome.Status.FAILED_PARENT, first.getStatus());
+
+ // unchanged
+ assertEquals(1, child.getAllocatedMemory());
+ assertEquals(2, parent.getAllocatedMemory());
+
+ boolean withinLimit = child.forceAllocate(10);
+ assertEquals(false, withinLimit);
+
+ // at new limit
+ assertEquals(child.getAllocatedMemory(), 11);
+ assertEquals(parent.getAllocatedMemory(), 11);
+ assertEquals(-1, child.getHeadroom());
+ assertEquals(-1, parent.getHeadroom());
+
+ child.releaseBytes(11);
+ assertEquals(child.getAllocatedMemory(), 0);
+ assertEquals(parent.getAllocatedMemory(), 2);
+ assertEquals(10, child.getHeadroom());
+ assertEquals(8, parent.getHeadroom());
+
+ child.close();
+ parent.close();
+ }
+}
diff --git a/src/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestAllocationManager.java b/src/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestAllocationManager.java
new file mode 100644
index 000000000..df28424b3
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestAllocationManager.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+import static org.junit.Assert.assertEquals;
+
+import org.junit.Test;
+
+/**
+ * Test cases for {@link AllocationManager}.
+ */
+public class TestAllocationManager {
+
+ @Test
+ public void testAllocationManagerType() {
+
+ // test unknown allocation manager type
+ System.clearProperty(DefaultAllocationManagerOption.ALLOCATION_MANAGER_TYPE_PROPERTY_NAME);
+ DefaultAllocationManagerOption.AllocationManagerType mgrType =
+ DefaultAllocationManagerOption.getDefaultAllocationManagerType();
+
+ assertEquals(DefaultAllocationManagerOption.AllocationManagerType.Unknown, mgrType);
+ }
+}
diff --git a/src/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java b/src/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java
new file mode 100644
index 000000000..ea74f21d8
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java
@@ -0,0 +1,149 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.util.Arrays;
+
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class TestArrowBuf {
+
+ private static final int MAX_ALLOCATION = 8 * 1024;
+ private static RootAllocator allocator;
+
+ @BeforeClass
+ public static void beforeClass() {
+ allocator = new RootAllocator(MAX_ALLOCATION);
+ }
+
+ /** Ensure the allocator is closed. */
+ @AfterClass
+ public static void afterClass() {
+ if (allocator != null) {
+ allocator.close();
+ }
+ }
+
+ @Test(expected = IndexOutOfBoundsException.class)
+ public void testSliceOutOfBoundsLength_RaisesIndexOutOfBoundsException() {
+ try (BufferAllocator allocator = new RootAllocator(128);
+ ArrowBuf buf = allocator.buffer(2)
+ ) {
+ assertEquals(2, buf.capacity());
+ buf.slice(0, 3);
+ }
+ }
+
+ @Test(expected = IndexOutOfBoundsException.class)
+ public void testSliceOutOfBoundsIndexPlusLength_RaisesIndexOutOfBoundsException() {
+ try (BufferAllocator allocator = new RootAllocator(128);
+ ArrowBuf buf = allocator.buffer(2)
+ ) {
+ assertEquals(2, buf.capacity());
+ buf.slice(1, 2);
+ }
+ }
+
+ @Test(expected = IndexOutOfBoundsException.class)
+ public void testSliceOutOfBoundsIndex_RaisesIndexOutOfBoundsException() {
+ try (BufferAllocator allocator = new RootAllocator(128);
+ ArrowBuf buf = allocator.buffer(2)
+ ) {
+ assertEquals(2, buf.capacity());
+ buf.slice(3, 0);
+ }
+ }
+
+ @Test
+ public void testSliceWithinBoundsLength_ReturnsSlice() {
+ try (BufferAllocator allocator = new RootAllocator(128);
+ ArrowBuf buf = allocator.buffer(2)
+ ) {
+ assertEquals(2, buf.capacity());
+ assertEquals(1, buf.slice(1, 1).capacity());
+ assertEquals(2, buf.slice(0, 2).capacity());
+ }
+ }
+
+ @Test
+ public void testSetBytesSliced() {
+ int arrLength = 64;
+ byte[] expected = new byte[arrLength];
+ for (int i = 0; i < expected.length; i++) {
+ expected[i] = (byte) i;
+ }
+ ByteBuffer data = ByteBuffer.wrap(expected);
+ try (ArrowBuf buf = allocator.buffer(expected.length)) {
+ buf.setBytes(0, data, 0, data.capacity());
+
+ byte[] actual = new byte[expected.length];
+ buf.getBytes(0, actual);
+ assertArrayEquals(expected, actual);
+ }
+ }
+
+ @Test
+ public void testSetBytesUnsliced() {
+ int arrLength = 64;
+ byte[] arr = new byte[arrLength];
+ for (int i = 0; i < arr.length; i++) {
+ arr[i] = (byte) i;
+ }
+ ByteBuffer data = ByteBuffer.wrap(arr);
+
+ int from = 10;
+ int to = arrLength;
+ byte[] expected = Arrays.copyOfRange(arr, from, to);
+ try (ArrowBuf buf = allocator.buffer(expected.length)) {
+ buf.setBytes(0, data, from, to - from);
+
+ byte[] actual = new byte[expected.length];
+ buf.getBytes(0, actual);
+ assertArrayEquals(expected, actual);
+ }
+ }
+
+ /** ARROW-9221: guard against big-endian byte buffers. */
+ @Test
+ public void testSetBytesBigEndian() {
+ final byte[] expected = new byte[64];
+ for (int i = 0; i < expected.length; i++) {
+ expected[i] = (byte) i;
+ }
+ // Only this code path is susceptible: others use unsafe or byte-by-byte copies, while this override copies longs.
+ final ByteBuffer data = ByteBuffer.wrap(expected).asReadOnlyBuffer();
+ assertFalse(data.hasArray());
+ assertFalse(data.isDirect());
+ assertEquals(ByteOrder.BIG_ENDIAN, data.order());
+ try (ArrowBuf buf = allocator.buffer(expected.length)) {
+ buf.setBytes(0, data);
+ byte[] actual = new byte[expected.length];
+ buf.getBytes(0, actual);
+ assertArrayEquals(expected, actual);
+ }
+ }
+
+}
diff --git a/src/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestBoundaryChecking.java b/src/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestBoundaryChecking.java
new file mode 100644
index 000000000..5b86bed40
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestBoundaryChecking.java
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+import java.lang.reflect.Field;
+import java.net.URLClassLoader;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Test cases for evaluating the value of {@link BoundsChecking#BOUNDS_CHECKING_ENABLED}.
+ */
+public class TestBoundaryChecking {
+
+ /**
+ * Get a copy of the current class loader.
+ * @return the newly created class loader.
+ */
+ private ClassLoader copyClassLoader() {
+ ClassLoader curClassLoader = this.getClass().getClassLoader();
+ if (curClassLoader instanceof URLClassLoader) {
+ // for Java 1.8
+ return new URLClassLoader(((URLClassLoader) curClassLoader).getURLs(), null);
+ }
+
+ // for Java 1.9 and Java 11.
+ return null;
+ }
+
+ /**
+ * Get the value of flag {@link BoundsChecking#BOUNDS_CHECKING_ENABLED}.
+ * @param classLoader the class loader from which to get the flag value.
+ * @return value of the flag.
+ */
+ private boolean getFlagValue(ClassLoader classLoader) throws Exception {
+ Class<?> clazz = classLoader.loadClass("org.apache.arrow.memory.BoundsChecking");
+ Field field = clazz.getField("BOUNDS_CHECKING_ENABLED");
+ return (Boolean) field.get(null);
+ }
+
+ /**
+ * Ensure the flag for bounds checking is enabled by default.
+ * This will protect users from JVM crashes.
+ */
+ @Test
+ public void testDefaultValue() throws Exception {
+ ClassLoader classLoader = copyClassLoader();
+ if (classLoader != null) {
+ boolean boundsCheckingEnabled = getFlagValue(classLoader);
+ Assert.assertTrue(boundsCheckingEnabled);
+ }
+ }
+
+ /**
+ * Test setting the bounds checking flag by the old property.
+ * @throws Exception if loading class {@link BoundsChecking#BOUNDS_CHECKING_ENABLED} fails.
+ */
+ @Test
+ public void testEnableOldProperty() throws Exception {
+ String savedOldProperty = System.getProperty("drill.enable_unsafe_memory_access");
+ System.setProperty("drill.enable_unsafe_memory_access", "true");
+
+ ClassLoader classLoader = copyClassLoader();
+ if (classLoader != null) {
+ boolean boundsCheckingEnabled = getFlagValue(classLoader);
+ Assert.assertFalse(boundsCheckingEnabled);
+ }
+
+ // restore system property
+ if (savedOldProperty != null) {
+ System.setProperty("drill.enable_unsafe_memory_access", savedOldProperty);
+ } else {
+ System.clearProperty("drill.enable_unsafe_memory_access");
+ }
+ }
+
+ /**
+ * Test setting the bounds checking flag by the new property.
+ * @throws Exception if loading class {@link BoundsChecking#BOUNDS_CHECKING_ENABLED} fails.
+ */
+ @Test
+ public void testEnableNewProperty() throws Exception {
+ String savedNewProperty = System.getProperty("arrow.enable_unsafe_memory_access");
+
+ System.setProperty("arrow.enable_unsafe_memory_access", "true");
+
+ ClassLoader classLoader = copyClassLoader();
+ if (classLoader != null) {
+ boolean boundsCheckingEnabled = getFlagValue(classLoader);
+ Assert.assertFalse(boundsCheckingEnabled);
+ }
+
+ // restore system property
+ if (savedNewProperty != null) {
+ System.setProperty("arrow.enable_unsafe_memory_access", savedNewProperty);
+ } else {
+ System.clearProperty("arrow.enable_unsafe_memory_access");
+ }
+ }
+
+ /**
+ * Test setting the bounds checking flag by both old and new properties.
+ * In this case, the new property should take precedence.
+ * @throws Exception if loading class {@link BoundsChecking#BOUNDS_CHECKING_ENABLED} fails.
+ */
+ @Test
+ public void testEnableBothProperties() throws Exception {
+ String savedOldProperty = System.getProperty("drill.enable_unsafe_memory_access");
+ String savedNewProperty = System.getProperty("arrow.enable_unsafe_memory_access");
+
+ System.setProperty("drill.enable_unsafe_memory_access", "false");
+ System.setProperty("arrow.enable_unsafe_memory_access", "true");
+
+ // new property takes precedence.
+ ClassLoader classLoader = copyClassLoader();
+ if (classLoader != null) {
+ boolean boundsCheckingEnabled = getFlagValue(classLoader);
+ Assert.assertFalse(boundsCheckingEnabled);
+ }
+
+ // restore system property
+ if (savedOldProperty != null) {
+ System.setProperty("drill.enable_unsafe_memory_access", savedOldProperty);
+ } else {
+ System.clearProperty("drill.enable_unsafe_memory_access");
+ }
+
+ if (savedNewProperty != null) {
+ System.setProperty("arrow.enable_unsafe_memory_access", savedNewProperty);
+ } else {
+ System.clearProperty("arrow.enable_unsafe_memory_access");
+ }
+ }
+}
diff --git a/src/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestLowCostIdentityHashMap.java b/src/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestLowCostIdentityHashMap.java
new file mode 100644
index 000000000..0cabc4a05
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestLowCostIdentityHashMap.java
@@ -0,0 +1,169 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+import static junit.framework.TestCase.assertNotNull;
+import static junit.framework.TestCase.assertTrue;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+import org.junit.Test;
+
+/**
+ * To test simplified implementation of IdentityHashMap.
+ */
+public class TestLowCostIdentityHashMap {
+
+ @Test
+ public void testIdentityHashMap() throws Exception {
+ LowCostIdentityHashMap<String, StringWithKey> hashMap = new LowCostIdentityHashMap<>();
+
+ StringWithKey obj1 = new StringWithKey("s1key", "s1value");
+ StringWithKey obj2 = new StringWithKey("s2key", "s2value");
+ StringWithKey obj3 = new StringWithKey("s3key", "s3value");
+ StringWithKey obj4 = new StringWithKey("s1key", "s4value");
+ StringWithKey obj5 = new StringWithKey("s5key", "s5value");
+
+ assertNull(hashMap.put(obj1));
+ assertNull(hashMap.put(obj2));
+ assertNull(hashMap.put(obj3));
+ assertEquals(obj1, hashMap.put(obj4));
+ assertNull(hashMap.put(obj5));
+
+ assertEquals(4, hashMap.size());
+
+ assertEquals(obj4, hashMap.get("s1key"));
+
+ assertNull(hashMap.remove("abc"));
+
+ assertEquals(obj3, hashMap.remove("s3key"));
+
+ assertEquals(3, hashMap.size());
+
+ assertTrue(!hashMap.isEmpty());
+
+ StringWithKey nextValue = hashMap.getNextValue();
+
+ assertNotNull(nextValue);
+
+ assertTrue((hashMap.get("s1key") == nextValue || hashMap.get("s2key") == nextValue ||
+ hashMap.get("s5key") == nextValue));
+
+ assertTrue(hashMap.containsValue(obj4));
+ assertTrue(hashMap.containsValue(obj2));
+ assertTrue(hashMap.containsValue(obj5));
+
+ assertEquals(obj4, hashMap.remove("s1key"));
+
+ nextValue = hashMap.getNextValue();
+
+ assertNotNull(nextValue);
+
+ assertTrue(hashMap.get("s2key") == nextValue || hashMap.get("s5key") == nextValue);
+
+ assertEquals(2, hashMap.size());
+
+ assertEquals(obj2, hashMap.remove("s2key"));
+ assertEquals(obj5, hashMap.remove("s5key"));
+
+ assertEquals(0, hashMap.size());
+
+ assertTrue(hashMap.isEmpty());
+ }
+
+ @Test
+ public void testLargeMap() throws Exception {
+ LowCostIdentityHashMap<String, StringWithKey> hashMap = new LowCostIdentityHashMap<>();
+
+ String [] keys = new String[200];
+ for (int i = 0; i < 200; i++) {
+ keys[i] = "s" + i + "key";
+ }
+
+ for (int i = 0; i < 100; i++) {
+ if (i % 5 == 0 && i != 0) {
+ StringWithKey obj = new StringWithKey(keys[i - 5], "s" + i + "value");
+ StringWithKey retObj = hashMap.put(obj);
+ assertNotNull(retObj);
+ StringWithKey obj1 = new StringWithKey(keys[i], "s" + 2 * i + "value");
+ StringWithKey retObj1 = hashMap.put(obj1);
+ assertNull(retObj1);
+ } else {
+ StringWithKey obj = new StringWithKey(keys[i], "s" + i + "value");
+ StringWithKey retObj = hashMap.put(obj);
+ assertNull(retObj);
+ }
+ }
+ assertEquals(100, hashMap.size());
+ for (int i = 0; i < 100; i++) {
+ StringWithKey returnObj = hashMap.get(keys[i]);
+ assertNotNull(returnObj);
+ if (i == 95) {
+ assertEquals("s190value", returnObj.getValue());
+ continue;
+ }
+ if (i % 5 == 0) {
+ assertEquals("s" + (i + 5) + "value", returnObj.getValue());
+ } else {
+ assertEquals("s" + i + "value", returnObj.getValue());
+ }
+ }
+
+ for (int i = 0; i < 100; i++) {
+ if (i % 4 == 0) {
+ StringWithKey returnObj = hashMap.remove(keys[i]);
+ assertNotNull(returnObj);
+ assertTrue(!hashMap.containsKey(keys[i]));
+ }
+ StringWithKey obj = new StringWithKey(keys[100 + i], "s" + (100 + i) + "value");
+ StringWithKey retObj = hashMap.put(obj);
+ assertNull(retObj);
+ assertTrue(hashMap.containsKey(keys[100 + i]));
+ }
+ assertEquals(175, hashMap.size());
+ for (int i = 0; i < 100; i++) {
+ StringWithKey retObj = hashMap.getNextValue();
+ assertNotNull(retObj);
+ hashMap.remove(retObj.getKey());
+ }
+ assertTrue(!hashMap.isEmpty());
+ assertEquals(75, hashMap.size());
+ hashMap.clear();
+ assertTrue(hashMap.isEmpty());
+ }
+
+ private class StringWithKey implements ValueWithKeyIncluded<String> {
+
+ private String myValue;
+ private String myKey;
+
+ StringWithKey(String myKey, String myValue) {
+ this.myKey = myKey;
+ this.myValue = myValue;
+ }
+
+ @Override
+ public String getKey() {
+ return myKey;
+ }
+
+ String getValue() {
+ return myValue;
+ }
+ }
+}
diff --git a/src/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/TestArrowBufPointer.java b/src/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/TestArrowBufPointer.java
new file mode 100644
index 000000000..c3ed0d057
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/TestArrowBufPointer.java
@@ -0,0 +1,216 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory.util;
+
+import static junit.framework.TestCase.assertEquals;
+import static junit.framework.TestCase.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.memory.util.hash.SimpleHasher;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Test cases for {@link ArrowBufPointer}.
+ */
+public class TestArrowBufPointer {
+
+ private final int BUFFER_LENGTH = 1024;
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void prepare() {
+ allocator = new RootAllocator(1024 * 1024);
+ }
+
+ @After
+ public void shutdown() {
+ allocator.close();
+ }
+
+ @Test
+ public void testArrowBufPointersEqual() {
+ try (ArrowBuf buf1 = allocator.buffer(BUFFER_LENGTH);
+ ArrowBuf buf2 = allocator.buffer(BUFFER_LENGTH)) {
+ for (int i = 0; i < BUFFER_LENGTH / 4; i++) {
+ buf1.setInt(i * 4, i * 1234);
+ buf2.setInt(i * 4, i * 1234);
+ }
+
+ ArrowBufPointer ptr1 = new ArrowBufPointer(null, 0, 100);
+ ArrowBufPointer ptr2 = new ArrowBufPointer(null, 100, 5032);
+ assertTrue(ptr1.equals(ptr2));
+ for (int i = 0; i < BUFFER_LENGTH / 4; i++) {
+ ptr1.set(buf1, i * 4, 4);
+ ptr2.set(buf2, i * 4, 4);
+ assertTrue(ptr1.equals(ptr2));
+ }
+ }
+ }
+
+ @Test
+ public void testArrowBufPointersHashCode() {
+ final int vectorLength = 100;
+ try (ArrowBuf buf1 = allocator.buffer(vectorLength * 4);
+ ArrowBuf buf2 = allocator.buffer(vectorLength * 4)) {
+ for (int i = 0; i < vectorLength; i++) {
+ buf1.setInt(i * 4, i);
+ buf2.setInt(i * 4, i);
+ }
+
+ CounterHasher hasher1 = new CounterHasher();
+ CounterHasher hasher2 = new CounterHasher();
+
+ ArrowBufPointer pointer1 = new ArrowBufPointer(hasher1);
+ assertEquals(ArrowBufPointer.NULL_HASH_CODE, pointer1.hashCode());
+
+ ArrowBufPointer pointer2 = new ArrowBufPointer(hasher2);
+ assertEquals(ArrowBufPointer.NULL_HASH_CODE, pointer2.hashCode());
+
+ for (int i = 0; i < vectorLength; i++) {
+ pointer1.set(buf1, i * 4, 4);
+ pointer2.set(buf2, i * 4, 4);
+
+ assertEquals(pointer1.hashCode(), pointer2.hashCode());
+
+ // verify that the hash codes have been re-computed
+ assertEquals(hasher1.counter, i + 1);
+ assertEquals(hasher2.counter, i + 1);
+ }
+ }
+ }
+
+ @Test
+ public void testNullPointersHashCode() {
+ ArrowBufPointer pointer = new ArrowBufPointer();
+ assertEquals(ArrowBufPointer.NULL_HASH_CODE, pointer.hashCode());
+
+ pointer.set(null, 0, 0);
+ assertEquals(ArrowBufPointer.NULL_HASH_CODE, pointer.hashCode());
+ }
+
+ @Test
+ public void testReuseHashCode() {
+ try (ArrowBuf buf = allocator.buffer(10)) {
+ buf.setInt(0, 10);
+ buf.setInt(4, 20);
+
+ CounterHasher hasher = new CounterHasher();
+ ArrowBufPointer pointer = new ArrowBufPointer(hasher);
+
+ pointer.set(buf, 0, 4);
+ pointer.hashCode();
+
+ // hash code computed
+ assertEquals(1, hasher.counter);
+
+ // no hash code re-compute
+ pointer.hashCode();
+ assertEquals(1, hasher.counter);
+
+ // hash code re-computed
+ pointer.set(buf, 4, 4);
+ pointer.hashCode();
+ assertEquals(2, hasher.counter);
+ }
+ }
+
+ @Test
+ public void testHashersForEquality() {
+ try (ArrowBuf buf = allocator.buffer(10)) {
+ // pointer 1 uses the default hasher
+ ArrowBufPointer pointer1 = new ArrowBufPointer(buf, 0, 10);
+
+ // pointer 2 uses the counter hasher
+ ArrowBufPointer pointer2 = new ArrowBufPointer(buf, 0, 10, new CounterHasher());
+
+ // the two pointers cannot be equal, since they have different hashers
+ assertFalse(pointer1.equals(pointer2));
+ }
+ }
+
+ @Test
+ public void testArrowBufPointersComparison() {
+ final int vectorLength = 100;
+ try (ArrowBuf buf1 = allocator.buffer(vectorLength);
+ ArrowBuf buf2 = allocator.buffer(vectorLength)) {
+ for (int i = 0; i < vectorLength; i++) {
+ buf1.setByte(i, i);
+ buf2.setByte(i, i);
+ }
+
+ ArrowBufPointer pointer1 = new ArrowBufPointer();
+ ArrowBufPointer pointer2 = new ArrowBufPointer();
+
+ pointer1.set(buf1, 0, 10);
+ pointer2.set(buf2, 0, 10);
+ assertEquals(0, pointer1.compareTo(pointer2));
+
+ pointer1.set(null, 0, 0);
+ pointer2.set(null, 0, 0);
+ assertEquals(0, pointer1.compareTo(pointer2));
+
+ pointer2.set(buf2, 0, 5);
+ assertTrue(pointer1.compareTo(pointer2) < 0);
+
+ pointer1.set(buf1, 0, 10);
+ assertTrue(pointer1.compareTo(pointer2) > 0);
+
+ pointer1.set(buf1, 1, 5);
+ pointer2.set(buf2, 3, 8);
+ assertTrue(pointer1.compareTo(pointer2) < 0);
+ }
+ }
+
+ /**
+ * Hasher with a counter that increments each time a hash code is calculated.
+ * This is to validate that the hash code in {@link ArrowBufPointer} is reused.
+ */
+ class CounterHasher implements ArrowBufHasher {
+
+ protected int counter = 0;
+
+ @Override
+ public int hashCode(long address, long length) {
+ counter += 1;
+ return SimpleHasher.INSTANCE.hashCode(address, length);
+ }
+
+ @Override
+ public int hashCode(ArrowBuf buf, long offset, long length) {
+ counter += 1;
+ return SimpleHasher.INSTANCE.hashCode(buf, offset, length);
+ }
+
+ @Override
+ public int hashCode() {
+ return super.hashCode();
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ return o != null && this.getClass() == o.getClass();
+ }
+ }
+}
diff --git a/src/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/TestByteFunctionHelpers.java b/src/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/TestByteFunctionHelpers.java
new file mode 100644
index 000000000..04a715962
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/TestByteFunctionHelpers.java
@@ -0,0 +1,167 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory.util;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestByteFunctionHelpers {
+
+ private BufferAllocator allocator;
+
+ private static final int SIZE = 100;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testEquals() {
+ ArrowBuf buffer1 = allocator.buffer(SIZE);
+ ArrowBuf buffer2 = allocator.buffer(SIZE);
+
+ for (int i = 0; i < SIZE; i++) {
+ buffer1.setByte(i, i);
+ buffer2.setByte(i, i);
+ }
+
+ //test three cases, length>8, length>3, length<3
+
+ assertEquals(1, ByteFunctionHelpers.equal(buffer1, 0, SIZE - 1,
+ buffer2, 0, SIZE - 1));
+ assertEquals(1, ByteFunctionHelpers.equal(buffer1, 0, 6,
+ buffer2, 0, 6));
+ assertEquals(1, ByteFunctionHelpers.equal(buffer1, 0, 2,
+ buffer2, 0, 2));
+
+ //change value at index1
+ buffer1.setByte(1, 10);
+
+ assertEquals(0, ByteFunctionHelpers.equal(buffer1, 0, SIZE - 1,
+ buffer2, 0, SIZE - 1));
+ assertEquals(0, ByteFunctionHelpers.equal(buffer1, 0, 6,
+ buffer2, 0, 6));
+ assertEquals(0, ByteFunctionHelpers.equal(buffer1, 0, 2,
+ buffer2, 0, 2));
+
+ buffer1.close();
+ buffer2.close();
+
+ }
+
+ @Test
+ public void testCompare() {
+ ArrowBuf buffer1 = allocator.buffer(SIZE);
+ ArrowBuf buffer2 = allocator.buffer(SIZE);
+
+ for (int i = 0; i < SIZE; i++) {
+ buffer1.setByte(i, i);
+ buffer2.setByte(i, i);
+ }
+
+ //test three cases, length>8, length>3, length<3
+
+ assertEquals(0, ByteFunctionHelpers.compare(buffer1, 0, SIZE - 1,
+ buffer2, 0, SIZE - 1));
+ assertEquals(0, ByteFunctionHelpers.compare(buffer1, 0, 6,
+ buffer2, 0, 6));
+ assertEquals(0, ByteFunctionHelpers.compare(buffer1, 0, 2,
+ buffer2, 0, 2));
+
+ //change value at index 1
+ buffer1.setByte(1, 0);
+
+ assertEquals(-1, ByteFunctionHelpers.compare(buffer1, 0, SIZE - 1,
+ buffer2, 0, SIZE - 1));
+ assertEquals(-1, ByteFunctionHelpers.compare(buffer1, 0, 6,
+ buffer2, 0, 6));
+ assertEquals(-1, ByteFunctionHelpers.compare(buffer1, 0, 2,
+ buffer2, 0, 2));
+
+ buffer1.close();
+ buffer2.close();
+
+ }
+
+ @Test
+ public void testStringCompare() {
+ String[] leftStrings = {"cat", "cats", "catworld", "dogs", "bags"};
+ String[] rightStrings = {"dog", "dogs", "dogworld", "dog", "sgab"};
+
+ for (int i = 0; i < leftStrings.length; ++i) {
+ String leftStr = leftStrings[i];
+ String rightStr = rightStrings[i];
+
+ ArrowBuf left = allocator.buffer(SIZE);
+ left.setBytes(0, leftStr.getBytes());
+ ArrowBuf right = allocator.buffer(SIZE);
+ right.setBytes(0, rightStr.getBytes());
+
+ assertEquals(leftStr.compareTo(rightStr) < 0 ? -1 : 1,
+ ByteFunctionHelpers.compare(left, 0, leftStr.length(), right, 0, rightStr.length()));
+
+ left.close();
+ right.close();
+ }
+ }
+
+ @Test
+ public void testCompareWithByteArray() {
+ ArrowBuf buffer1 = allocator.buffer(SIZE);
+ byte[] buffer2 = new byte[SIZE];
+
+ for (int i = 0; i < SIZE; i++) {
+ buffer1.setByte(i, i);
+ buffer2[i] = (byte) i;
+ }
+
+ //test three cases, length>8, length>3, length<3
+
+ assertEquals(0, ByteFunctionHelpers.compare(buffer1, 0, SIZE - 1,
+ buffer2, 0, SIZE - 1));
+ assertEquals(0, ByteFunctionHelpers.compare(buffer1, 0, 6,
+ buffer2, 0, 6));
+ assertEquals(0, ByteFunctionHelpers.compare(buffer1, 0, 2,
+ buffer2, 0, 2));
+
+ //change value at index 1
+ buffer1.setByte(1, 0);
+
+ assertEquals(-1, ByteFunctionHelpers.compare(buffer1, 0, SIZE - 1,
+ buffer2, 0, SIZE - 1));
+ assertEquals(-1, ByteFunctionHelpers.compare(buffer1, 0, 6,
+ buffer2, 0, 6));
+ assertEquals(-1, ByteFunctionHelpers.compare(buffer1, 0, 2,
+ buffer2, 0, 2));
+
+ buffer1.close();
+ }
+}
diff --git a/src/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/TestLargeMemoryUtil.java b/src/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/TestLargeMemoryUtil.java
new file mode 100755
index 000000000..952fcb5f0
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/TestLargeMemoryUtil.java
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory.util;
+
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.net.URLClassLoader;
+
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.jupiter.api.Assertions;
+
+public class TestLargeMemoryUtil {
+
+ /**
+ * Get a copy of the current class loader.
+ * @return the newly created class loader.
+ */
+ private ClassLoader copyClassLoader() {
+ ClassLoader curClassLoader = this.getClass().getClassLoader();
+ if (curClassLoader instanceof URLClassLoader) {
+ // for Java 1.8
+ return new URLClassLoader(((URLClassLoader) curClassLoader).getURLs(), null);
+ }
+
+ // for Java 1.9 and Java 11.
+ return null;
+ }
+
+ /**
+ * Use the checkedCastToInt method from the current classloader.
+ * @param classLoader the class loader from which to call the method.
+ * @return the return value of the method.
+ */
+ private int checkedCastToInt(ClassLoader classLoader, long value) throws Exception {
+ Class<?> clazz = classLoader.loadClass("org.apache.arrow.memory.util.LargeMemoryUtil");
+ Method method = clazz.getMethod("checkedCastToInt", long.class);
+ return (int) method.invoke(null, value);
+ }
+
+ private void checkExpectedOverflow(ClassLoader classLoader, long value) {
+ InvocationTargetException ex = Assertions.assertThrows(InvocationTargetException.class, () -> {
+ checkedCastToInt(classLoader, value);
+ });
+ Assert.assertTrue(ex.getCause() instanceof ArithmeticException);
+ Assert.assertEquals("integer overflow", ex.getCause().getMessage());
+ }
+
+ @Test
+ public void testEnableLargeMemoryUtilCheck() throws Exception {
+ String savedNewProperty = System.getProperty("arrow.enable_unsafe_memory_access");
+ System.setProperty("arrow.enable_unsafe_memory_access", "false");
+ try {
+ ClassLoader classLoader = copyClassLoader();
+ if (classLoader != null) {
+ Assert.assertEquals(Integer.MAX_VALUE, checkedCastToInt(classLoader, Integer.MAX_VALUE));
+ checkExpectedOverflow(classLoader, Integer.MAX_VALUE + 1L);
+ checkExpectedOverflow(classLoader, Integer.MIN_VALUE - 1L);
+ }
+ } finally {
+ // restore system property
+ if (savedNewProperty != null) {
+ System.setProperty("arrow.enable_unsafe_memory_access", savedNewProperty);
+ } else {
+ System.clearProperty("arrow.enable_unsafe_memory_access");
+ }
+ }
+ }
+
+ @Test
+ public void testDisabledLargeMemoryUtilCheck() throws Exception {
+ String savedNewProperty = System.getProperty("arrow.enable_unsafe_memory_access");
+ System.setProperty("arrow.enable_unsafe_memory_access", "true");
+ try {
+ ClassLoader classLoader = copyClassLoader();
+ if (classLoader != null) {
+ Assert.assertEquals(Integer.MAX_VALUE, checkedCastToInt(classLoader, Integer.MAX_VALUE));
+ Assert.assertEquals(Integer.MIN_VALUE, checkedCastToInt(classLoader, Integer.MAX_VALUE + 1L));
+ Assert.assertEquals(Integer.MAX_VALUE, checkedCastToInt(classLoader, Integer.MIN_VALUE - 1L));
+ }
+ } finally {
+ // restore system property
+ if (savedNewProperty != null) {
+ System.setProperty("arrow.enable_unsafe_memory_access", savedNewProperty);
+ } else {
+ System.clearProperty("arrow.enable_unsafe_memory_access");
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/hash/TestArrowBufHasher.java b/src/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/hash/TestArrowBufHasher.java
new file mode 100644
index 000000000..a8707e6ca
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/hash/TestArrowBufHasher.java
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory.util.hash;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import java.util.Arrays;
+import java.util.Collection;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+/**
+ * Test cases for {@link ArrowBufHasher} and its subclasses.
+ */
+@RunWith(Parameterized.class)
+public class TestArrowBufHasher {
+
+ private final int BUFFER_LENGTH = 1024;
+
+ private BufferAllocator allocator;
+
+ private ArrowBufHasher hasher;
+
+ public TestArrowBufHasher(String name, ArrowBufHasher hasher) {
+ this.hasher = hasher;
+ }
+
+ @Before
+ public void prepare() {
+ allocator = new RootAllocator(1024 * 1024);
+ }
+
+ @After
+ public void shutdown() {
+ allocator.close();
+ }
+
+ @Test
+ public void testHasher() {
+ try (ArrowBuf buf1 = allocator.buffer(BUFFER_LENGTH);
+ ArrowBuf buf2 = allocator.buffer(BUFFER_LENGTH)) {
+ // prepare data
+ for (int i = 0; i < BUFFER_LENGTH / 4; i++) {
+ buf1.setFloat(i * 4, i / 10.0f);
+ buf2.setFloat(i * 4, i / 10.0f);
+ }
+
+ verifyHashCodesEqual(buf1, 0, 100, buf2, 0, 100);
+ verifyHashCodesEqual(buf1, 1, 5, buf2, 1, 5);
+ verifyHashCodesEqual(buf1, 10, 17, buf2, 10, 17);
+ verifyHashCodesEqual(buf1, 33, 25, buf2, 33, 25);
+ verifyHashCodesEqual(buf1, 22, 22, buf2, 22, 22);
+ verifyHashCodesEqual(buf1, 123, 333, buf2, 123, 333);
+ verifyHashCodesEqual(buf1, 374, 1, buf2, 374, 1);
+ verifyHashCodesEqual(buf1, 11, 0, buf2, 11, 0);
+ verifyHashCodesEqual(buf1, 75, 25, buf2, 75, 25);
+ verifyHashCodesEqual(buf1, 0, 1024, buf2, 0, 1024);
+ }
+ }
+
+ private void verifyHashCodesEqual(ArrowBuf buf1, int offset1, int length1,
+ ArrowBuf buf2, int offset2, int length2) {
+ int hashCode1 = hasher.hashCode(buf1, offset1, length1);
+ int hashCode2 = hasher.hashCode(buf2, offset2, length2);
+ assertEquals(hashCode1, hashCode2);
+ }
+
+ @Test
+ public void testHasherNegative() {
+ try (ArrowBuf buf = allocator.buffer(BUFFER_LENGTH)) {
+ // prepare data
+ for (int i = 0; i < BUFFER_LENGTH / 4; i++) {
+ buf.setFloat(i * 4, i / 10.0f);
+ }
+
+ assertThrows(IllegalArgumentException.class, () -> {
+ hasher.hashCode(buf, 0, -1);
+ });
+
+ assertThrows(IndexOutOfBoundsException.class, () -> {
+ hasher.hashCode(buf, 0, 1028);
+ });
+
+ assertThrows(IndexOutOfBoundsException.class, () -> {
+ hasher.hashCode(buf, 500, 1000);
+ });
+ }
+ }
+
+ @Parameterized.Parameters(name = "hasher = {0}")
+ public static Collection<Object[]> getHasher() {
+ return Arrays.asList(
+ new Object[] {SimpleHasher.class.getSimpleName(),
+ SimpleHasher.INSTANCE},
+ new Object[] {MurmurHasher.class.getSimpleName(),
+ new MurmurHasher()
+ }
+ );
+ }
+}
diff --git a/src/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/util/TestCollections2.java b/src/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/util/TestCollections2.java
new file mode 100644
index 000000000..c858ebe62
--- /dev/null
+++ b/src/arrow/java/memory/memory-core/src/test/java/org/apache/arrow/util/TestCollections2.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.util;
+
+import static org.junit.Assert.fail;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+
+import org.junit.Test;
+
+/**
+ * Tests for {@code Collections2} class.
+ */
+public class TestCollections2 {
+
+
+ @Test
+ public void testToImmutableListFromIterable() {
+ final List<String> source = new ArrayList<>(Arrays.asList("foo", "bar", "baz"));
+
+ final List<String> copy = Collections2.toImmutableList(source);
+ assertEquals(source, copy);
+
+ try {
+ copy.add("unexpected");
+ fail("add operation should not be supported");
+ } catch (UnsupportedOperationException ignored) {
+ }
+
+ try {
+ copy.set(0, "unexpected");
+ fail("set operation should not be supported");
+ } catch (UnsupportedOperationException ignored) {
+ }
+
+ try {
+ copy.remove(0);
+ fail("remove operation should not be supported");
+ } catch (UnsupportedOperationException ignored) {
+ }
+
+ source.set(1, "newvalue");
+ source.add("anothervalue");
+
+ assertEquals("bar", copy.get(1));
+ assertEquals(3, copy.size());
+ }
+
+
+ @Test
+ public void testStringFromEmptyIterator() {
+ assertEquals("[]", Collections2.toString(Collections.emptyIterator()));
+ }
+
+ @Test
+ public void testStringFromIterator() {
+ Iterator<String> iterator = Arrays.asList("foo", "bar", "baz").iterator();
+ iterator.next();
+
+ assertEquals("[bar, baz]", Collections2.toString(iterator));
+ assertEquals(false, iterator.hasNext());
+ }
+}
diff --git a/src/arrow/java/memory/memory-netty/pom.xml b/src/arrow/java/memory/memory-netty/pom.xml
new file mode 100644
index 000000000..dee06a321
--- /dev/null
+++ b/src/arrow/java/memory/memory-netty/pom.xml
@@ -0,0 +1,72 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+ license agreements. See the NOTICE file distributed with this work for additional
+ information regarding copyright ownership. The ASF licenses this file to
+ You under the Apache License, Version 2.0 (the "License"); you may not use
+ this file except in compliance with the License. You may obtain a copy of
+ the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+ by applicable law or agreed to in writing, software distributed under the
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ OF ANY KIND, either express or implied. See the License for the specific
+ language governing permissions and limitations under the License. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <parent>
+ <artifactId>arrow-memory</artifactId>
+ <groupId>org.apache.arrow</groupId>
+ <version>6.0.1</version>
+ </parent>
+ <modelVersion>4.0.0</modelVersion>
+
+ <artifactId>arrow-memory-netty</artifactId>
+ <name>Arrow Memory - Netty</name>
+ <description>Netty allocator and utils for allocating memory in Arrow</description>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-memory-core</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>io.netty</groupId>
+ <artifactId>netty-buffer</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>io.netty</groupId>
+ <artifactId>netty-common</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.immutables</groupId>
+ <artifactId>value</artifactId>
+ </dependency>
+ </dependencies>
+
+ <profiles>
+ <profile>
+ <!-- This profile turns on integration testing. It activates the failsafe plugin and will run any tests
+ with the 'IT' prefix. This should be run in a separate CI build or on developers machines as it potentially
+ uses quite a bit of memory. Activate the tests by adding -Pintegration-tests to your maven command line -->
+ <id>integration-tests</id>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-failsafe-plugin</artifactId>
+ <executions>
+ <execution>
+ <goals>
+ <goal>integration-test</goal>
+ <goal>verify</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+ </profile>
+ </profiles>
+</project>
diff --git a/src/arrow/java/memory/memory-netty/src/main/java/io/netty/buffer/ExpandableByteBuf.java b/src/arrow/java/memory/memory-netty/src/main/java/io/netty/buffer/ExpandableByteBuf.java
new file mode 100644
index 000000000..09b730044
--- /dev/null
+++ b/src/arrow/java/memory/memory-netty/src/main/java/io/netty/buffer/ExpandableByteBuf.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.netty.buffer;
+
+import org.apache.arrow.memory.BufferAllocator;
+
+/**
+ * Allows us to decorate ArrowBuf to make it expandable so that we can use them in the context of
+ * the Netty framework
+ * (thus supporting RPC level memory accounting).
+ */
+public class ExpandableByteBuf extends MutableWrappedByteBuf {
+
+ private final BufferAllocator allocator;
+
+ public ExpandableByteBuf(ByteBuf buffer, BufferAllocator allocator) {
+ super(buffer);
+ this.allocator = allocator;
+ }
+
+ @Override
+ public ByteBuf copy(int index, int length) {
+ return new ExpandableByteBuf(buffer.copy(index, length), allocator);
+ }
+
+ @Override
+ public ByteBuf capacity(int newCapacity) {
+ if (newCapacity > capacity()) {
+ ByteBuf newBuf = NettyArrowBuf.unwrapBuffer(allocator.buffer(newCapacity));
+ newBuf.writeBytes(buffer, 0, buffer.capacity());
+ newBuf.readerIndex(buffer.readerIndex());
+ newBuf.writerIndex(buffer.writerIndex());
+ buffer.release();
+ buffer = newBuf;
+ return newBuf;
+ } else {
+ return super.capacity(newCapacity);
+ }
+ }
+
+}
diff --git a/src/arrow/java/memory/memory-netty/src/main/java/io/netty/buffer/LargeBuffer.java b/src/arrow/java/memory/memory-netty/src/main/java/io/netty/buffer/LargeBuffer.java
new file mode 100644
index 000000000..792b3b814
--- /dev/null
+++ b/src/arrow/java/memory/memory-netty/src/main/java/io/netty/buffer/LargeBuffer.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.netty.buffer;
+
+/**
+ * A MutableWrappedByteBuf that also maintains a metric of the number of huge buffer bytes and
+ * counts.
+ */
+public class LargeBuffer extends MutableWrappedByteBuf {
+
+ public LargeBuffer(ByteBuf buffer) {
+ super(buffer);
+ }
+
+ @Override
+ public ByteBuf copy(int index, int length) {
+ return new LargeBuffer(buffer.copy(index, length));
+ }
+}
diff --git a/src/arrow/java/memory/memory-netty/src/main/java/io/netty/buffer/MutableWrappedByteBuf.java b/src/arrow/java/memory/memory-netty/src/main/java/io/netty/buffer/MutableWrappedByteBuf.java
new file mode 100644
index 000000000..5221dd3c1
--- /dev/null
+++ b/src/arrow/java/memory/memory-netty/src/main/java/io/netty/buffer/MutableWrappedByteBuf.java
@@ -0,0 +1,448 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.netty.buffer;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.channels.GatheringByteChannel;
+import java.nio.channels.ScatteringByteChannel;
+
+import io.netty.util.ByteProcessor;
+
+/**
+ * This is basically a complete copy of netty's DuplicatedByteBuf. We copy because we want to override
+ * some behaviors and make buffer mutable.
+ */
+abstract class MutableWrappedByteBuf extends AbstractByteBuf {
+
+ ByteBuf buffer;
+
+ public MutableWrappedByteBuf(ByteBuf buffer) {
+ super(buffer.maxCapacity());
+
+ if (buffer instanceof MutableWrappedByteBuf) {
+ this.buffer = ((MutableWrappedByteBuf) buffer).buffer;
+ } else {
+ this.buffer = buffer;
+ }
+
+ setIndex(buffer.readerIndex(), buffer.writerIndex());
+ }
+
+ @Override
+ public ByteBuffer nioBuffer(int index, int length) {
+ return unwrap().nioBuffer(index, length);
+ }
+
+ @Override
+ public ByteBuf unwrap() {
+ return buffer;
+ }
+
+ @Override
+ public ByteBufAllocator alloc() {
+ return buffer.alloc();
+ }
+
+ @Override
+ public ByteOrder order() {
+ return buffer.order();
+ }
+
+ @Override
+ public boolean isDirect() {
+ return buffer.isDirect();
+ }
+
+ @Override
+ public int capacity() {
+ return buffer.capacity();
+ }
+
+ @Override
+ public ByteBuf capacity(int newCapacity) {
+ buffer.capacity(newCapacity);
+ return this;
+ }
+
+ @Override
+ public boolean hasArray() {
+ return buffer.hasArray();
+ }
+
+ @Override
+ public byte[] array() {
+ return buffer.array();
+ }
+
+ @Override
+ public int arrayOffset() {
+ return buffer.arrayOffset();
+ }
+
+ @Override
+ public boolean hasMemoryAddress() {
+ return buffer.hasMemoryAddress();
+ }
+
+ @Override
+ public long memoryAddress() {
+ return buffer.memoryAddress();
+ }
+
+ @Override
+ public byte getByte(int index) {
+ return _getByte(index);
+ }
+
+ @Override
+ protected byte _getByte(int index) {
+ return buffer.getByte(index);
+ }
+
+ @Override
+ public short getShort(int index) {
+ return _getShort(index);
+ }
+
+ @Override
+ protected short _getShort(int index) {
+ return buffer.getShort(index);
+ }
+
+ @Override
+ public short getShortLE(int index) {
+ return buffer.getShortLE(index);
+ }
+
+ @Override
+ protected short _getShortLE(int index) {
+ return buffer.getShortLE(index);
+ }
+
+ @Override
+ public int getUnsignedMedium(int index) {
+ return _getUnsignedMedium(index);
+ }
+
+ @Override
+ protected int _getUnsignedMedium(int index) {
+ return buffer.getUnsignedMedium(index);
+ }
+
+ @Override
+ public int getUnsignedMediumLE(int index) {
+ return buffer.getUnsignedMediumLE(index);
+ }
+
+ @Override
+ protected int _getUnsignedMediumLE(int index) {
+ return buffer.getUnsignedMediumLE(index);
+ }
+
+ @Override
+ public int getInt(int index) {
+ return _getInt(index);
+ }
+
+ @Override
+ protected int _getInt(int index) {
+ return buffer.getInt(index);
+ }
+
+ @Override
+ public int getIntLE(int index) {
+ return buffer.getIntLE(index);
+ }
+
+ @Override
+ protected int _getIntLE(int index) {
+ return buffer.getIntLE(index);
+ }
+
+ @Override
+ public long getLong(int index) {
+ return _getLong(index);
+ }
+
+ @Override
+ protected long _getLong(int index) {
+ return buffer.getLong(index);
+ }
+
+ @Override
+ public long getLongLE(int index) {
+ return buffer.getLongLE(index);
+ }
+
+ @Override
+ protected long _getLongLE(int index) {
+ return buffer.getLongLE(index);
+ }
+
+ @Override
+ public abstract ByteBuf copy(int index, int length);
+
+ @Override
+ public ByteBuf slice(int index, int length) {
+ return new SlicedByteBuf(this, index, length);
+ }
+
+ @Override
+ public ByteBuf getBytes(int index, ByteBuf dst, int dstIndex, int length) {
+ buffer.getBytes(index, dst, dstIndex, length);
+ return this;
+ }
+
+ @Override
+ public ByteBuf getBytes(int index, byte[] dst, int dstIndex, int length) {
+ buffer.getBytes(index, dst, dstIndex, length);
+ return this;
+ }
+
+ @Override
+ public ByteBuf getBytes(int index, ByteBuffer dst) {
+ buffer.getBytes(index, dst);
+ return this;
+ }
+
+ @Override
+ public ByteBuf setByte(int index, int value) {
+ _setByte(index, value);
+ return this;
+ }
+
+ @Override
+ protected void _setByte(int index, int value) {
+ buffer.setByte(index, value);
+ }
+
+ @Override
+ public ByteBuf setShort(int index, int value) {
+ _setShort(index, value);
+ return this;
+ }
+
+ @Override
+ protected void _setShort(int index, int value) {
+ buffer.setShort(index, value);
+ }
+
+ @Override
+ public ByteBuf setShortLE(int index, int value) {
+ buffer.setShortLE(index, value);
+ return this;
+ }
+
+ @Override
+ protected void _setShortLE(int index, int value) {
+ buffer.setShortLE(index, value);
+ }
+
+ @Override
+ public ByteBuf setMedium(int index, int value) {
+ _setMedium(index, value);
+ return this;
+ }
+
+ @Override
+ protected void _setMedium(int index, int value) {
+ buffer.setMedium(index, value);
+ }
+
+ @Override
+ public ByteBuf setMediumLE(int index, int value) {
+ buffer.setMediumLE(index, value);
+ return this;
+ }
+
+ @Override
+ protected void _setMediumLE(int index, int value) {
+ buffer.setMediumLE(index, value);
+ }
+
+ @Override
+ public ByteBuf setInt(int index, int value) {
+ _setInt(index, value);
+ return this;
+ }
+
+ @Override
+ protected void _setInt(int index, int value) {
+ buffer.setInt(index, value);
+ }
+
+ @Override
+ public ByteBuf setIntLE(int index, int value) {
+ buffer.setIntLE(index, value);
+ return this;
+ }
+
+ @Override
+ protected void _setIntLE(int index, int value) {
+ buffer.setIntLE(index, value);
+ }
+
+ @Override
+ public ByteBuf setLong(int index, long value) {
+ _setLong(index, value);
+ return this;
+ }
+
+ @Override
+ protected void _setLong(int index, long value) {
+ buffer.setLong(index, value);
+ }
+
+ @Override
+ public ByteBuf setLongLE(int index, long value) {
+ buffer.setLongLE(index, value);
+ return this;
+ }
+
+ @Override
+ protected void _setLongLE(int index, long value) {
+ buffer.setLongLE(index, value);
+ }
+
+ @Override
+ public ByteBuf setBytes(int index, byte[] src, int srcIndex, int length) {
+ buffer.setBytes(index, src, srcIndex, length);
+ return this;
+ }
+
+ @Override
+ public ByteBuf setBytes(int index, ByteBuf src, int srcIndex, int length) {
+ buffer.setBytes(index, src, srcIndex, length);
+ return this;
+ }
+
+ @Override
+ public ByteBuf setBytes(int index, ByteBuffer src) {
+ buffer.setBytes(index, src);
+ return this;
+ }
+
+ @Override
+ public int setBytes(int index, FileChannel in, long position, int length)
+ throws IOException {
+ return buffer.setBytes(index, in, position, length);
+ }
+
+ @Override
+ public ByteBuf getBytes(int index, OutputStream out, int length)
+ throws IOException {
+ buffer.getBytes(index, out, length);
+ return this;
+ }
+
+ @Override
+ public int getBytes(int index, GatheringByteChannel out, int length)
+ throws IOException {
+ return buffer.getBytes(index, out, length);
+ }
+
+ @Override
+ public int setBytes(int index, InputStream in, int length)
+ throws IOException {
+ return buffer.setBytes(index, in, length);
+ }
+
+ @Override
+ public int setBytes(int index, ScatteringByteChannel in, int length)
+ throws IOException {
+ return buffer.setBytes(index, in, length);
+ }
+
+
+ @Override
+ public int getBytes(int index, FileChannel out, long position, int length)
+ throws IOException {
+ return buffer.getBytes(index, out, position, length);
+ }
+
+ @Override
+ public int nioBufferCount() {
+ return buffer.nioBufferCount();
+ }
+
+ @Override
+ public ByteBuffer[] nioBuffers(int index, int length) {
+ return buffer.nioBuffers(index, length);
+ }
+
+ @Override
+ public ByteBuffer internalNioBuffer(int index, int length) {
+ return nioBuffer(index, length);
+ }
+
+ @Override
+ public int forEachByte(int index, int length, ByteProcessor processor) {
+ return buffer.forEachByte(index, length, processor);
+ }
+
+ @Override
+ public int forEachByteDesc(int index, int length, ByteProcessor processor) {
+ return buffer.forEachByteDesc(index, length, processor);
+ }
+
+ @Override
+ public final int refCnt() {
+ return unwrap().refCnt();
+ }
+
+ @Override
+ public final ByteBuf touch() {
+ unwrap().touch();
+ return this;
+ }
+
+ @Override
+ public final ByteBuf touch(Object hint) {
+ unwrap().touch(hint);
+ return this;
+ }
+
+ @Override
+ public final ByteBuf retain() {
+ unwrap().retain();
+ return this;
+ }
+
+ @Override
+ public final ByteBuf retain(int increment) {
+ unwrap().retain(increment);
+ return this;
+ }
+
+ @Override
+ public boolean release() {
+ return release(1);
+ }
+
+ @Override
+ public boolean release(int decrement) {
+ boolean released = unwrap().release(decrement);
+ return released;
+ }
+
+}
diff --git a/src/arrow/java/memory/memory-netty/src/main/java/io/netty/buffer/NettyArrowBuf.java b/src/arrow/java/memory/memory-netty/src/main/java/io/netty/buffer/NettyArrowBuf.java
new file mode 100644
index 000000000..8681b005f
--- /dev/null
+++ b/src/arrow/java/memory/memory-netty/src/main/java/io/netty/buffer/NettyArrowBuf.java
@@ -0,0 +1,622 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.netty.buffer;
+
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.channels.GatheringByteChannel;
+import java.nio.channels.ScatteringByteChannel;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.ArrowByteBufAllocator;
+import org.apache.arrow.memory.BoundsChecking;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.util.Preconditions;
+
+import io.netty.util.internal.PlatformDependent;
+
+/**
+ * Netty specific wrapper over ArrowBuf for use in Netty framework.
+ */
+public class NettyArrowBuf extends AbstractByteBuf implements AutoCloseable {
+
+ private final ArrowBuf arrowBuf;
+ private final ArrowByteBufAllocator arrowByteBufAllocator;
+ private int length;
+ private final long address;
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param arrowBuf The buffer to wrap.
+ * @param bufferAllocator The allocator for the buffer.
+ * @param length The length of this buffer.
+ */
+ public NettyArrowBuf(
+ final ArrowBuf arrowBuf,
+ final BufferAllocator bufferAllocator,
+ final int length) {
+ super(length);
+ this.arrowBuf = arrowBuf;
+ this.arrowByteBufAllocator = new ArrowByteBufAllocator(bufferAllocator);
+ this.length = length;
+ this.address = arrowBuf.memoryAddress();
+ }
+
+ @Override
+ public ByteBuf copy() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public ByteBuf copy(int index, int length) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public ByteBuf retain() {
+ arrowBuf.getReferenceManager().retain();
+ return this;
+ }
+
+ public ArrowBuf arrowBuf() {
+ return arrowBuf;
+ }
+
+ @Override
+ public ByteBuf retain(final int increment) {
+ arrowBuf.getReferenceManager().retain(increment);
+ return this;
+ }
+
+ @Override
+ public boolean isDirect() {
+ return true;
+ }
+
+ @Override
+ public synchronized ByteBuf capacity(int newCapacity) {
+ if (newCapacity == length) {
+ return this;
+ }
+ Preconditions.checkArgument(newCapacity >= 0);
+ if (newCapacity < length) {
+ length = newCapacity;
+ return this;
+ }
+ throw new UnsupportedOperationException("Buffers don't support resizing that increases the size.");
+ }
+
+ @Override
+ public ByteBuf unwrap() {
+ throw new UnsupportedOperationException("Unwrap not supported.");
+ }
+
+ @Override
+ public int refCnt() {
+ return arrowBuf.getReferenceManager().getRefCount();
+ }
+
+ @Override
+ public ArrowByteBufAllocator alloc() {
+ return arrowByteBufAllocator;
+ }
+
+ @Override
+ public boolean hasArray() {
+ return false;
+ }
+
+ @Override
+ public byte[] array() {
+ throw new UnsupportedOperationException("Operation not supported on direct buffer");
+ }
+
+ @Override
+ public int arrayOffset() {
+ throw new UnsupportedOperationException("Operation not supported on direct buffer");
+ }
+
+ @Override
+ public boolean hasMemoryAddress() {
+ return true;
+ }
+
+ @Override
+ public long memoryAddress() {
+ return this.address;
+ }
+
+ @Override
+ public ByteBuf touch() {
+ return this;
+ }
+
+ @Override
+ public ByteBuf touch(Object hint) {
+ return this;
+ }
+
+ @Override
+ public int capacity() {
+ return (int) Math.min(Integer.MAX_VALUE, arrowBuf.capacity());
+ }
+
+ @Override
+ public NettyArrowBuf slice() {
+ return unwrapBuffer(arrowBuf.slice(readerIndex, writerIndex - readerIndex));
+ }
+
+ @Override
+ public NettyArrowBuf slice(int index, int length) {
+ return unwrapBuffer(arrowBuf.slice(index, length));
+ }
+
+ @Override
+ public void close() {
+ arrowBuf.close();
+ }
+
+ @Override
+ public boolean release() {
+ return arrowBuf.getReferenceManager().release();
+ }
+
+ @Override
+ public boolean release(int decrement) {
+ return arrowBuf.getReferenceManager().release(decrement);
+ }
+
+ @Override
+ public NettyArrowBuf readerIndex(int readerIndex) {
+ super.readerIndex(readerIndex);
+ return this;
+ }
+
+ @Override
+ public NettyArrowBuf writerIndex(int writerIndex) {
+ super.writerIndex(writerIndex);
+ return this;
+ }
+
+ @Override
+ public int nioBufferCount() {
+ return 1;
+ }
+
+ @Override
+ public ByteBuffer internalNioBuffer(int index, int length) {
+ ByteBuffer nioBuf = getDirectBuffer(index);
+ // Follows convention from other ByteBuf implementations.
+ return (ByteBuffer) nioBuf.clear().limit(length);
+ }
+
+ @Override
+ public ByteBuffer[] nioBuffers() {
+ return new ByteBuffer[] {nioBuffer()};
+ }
+
+ @Override
+ public ByteBuffer[] nioBuffers(int index, int length) {
+ return new ByteBuffer[] {nioBuffer(index, length)};
+ }
+
+ @Override
+ public ByteBuffer nioBuffer() {
+ return nioBuffer(readerIndex(), readableBytes());
+ }
+
+
+ /**
+ * Returns a buffer that is zero positioned but points
+ * to a slice of the original buffer starting at given index.
+ */
+ @Override
+ public ByteBuffer nioBuffer(int index, int length) {
+ chk(index, length);
+ final ByteBuffer buffer = getDirectBuffer(index);
+ buffer.limit(length);
+ return buffer;
+ }
+
+ /**
+ * Returns a buffer that is zero positioned but points
+ * to a slice of the original buffer starting at given index.
+ */
+ public ByteBuffer nioBuffer(long index, int length) {
+ chk(index, length);
+ final ByteBuffer buffer = getDirectBuffer(index);
+ buffer.limit(length);
+ return buffer;
+ }
+
+ /**
+ * Get this ArrowBuf as a direct {@link ByteBuffer}.
+ *
+ * @return ByteBuffer
+ */
+ private ByteBuffer getDirectBuffer(long index) {
+ return PlatformDependent.directBuffer(addr(index), checkedCastToInt(length - index));
+ }
+
+ @Override
+ public ByteBuf getBytes(int index, ByteBuffer dst) {
+ arrowBuf.getBytes(index, dst);
+ return this;
+ }
+
+ @Override
+ public ByteBuf setBytes(int index, ByteBuffer src) {
+ arrowBuf.setBytes(index, src);
+ return this;
+ }
+
+ @Override
+ public ByteBuf getBytes(int index, byte[] dst, int dstIndex, int length) {
+ arrowBuf.getBytes(index, dst, dstIndex, length);
+ return this;
+ }
+
+ @Override
+ public ByteBuf setBytes(int index, byte[] src, int srcIndex, int length) {
+ arrowBuf.setBytes(index, src, srcIndex, length);
+ return this;
+ }
+
+ /**
+ * Determine if the requested {@code index} and {@code length} will fit within {@code capacity}.
+ *
+ * @param index The starting index.
+ * @param length The length which will be utilized (starting from {@code index}).
+ * @param capacity The capacity that {@code index + length} is allowed to be within.
+ * @return {@code true} if the requested {@code index} and {@code length} will fit within {@code capacity}.
+ * {@code false} if this would result in an index out of bounds exception.
+ */
+ private static boolean isOutOfBounds(int index, int length, int capacity) {
+ return (index | length | (index + length) | (capacity - (index + length))) < 0;
+ }
+
+ @Override
+ public ByteBuf getBytes(int index, ByteBuf dst, int dstIndex, int length) {
+ chk(index, length);
+ Preconditions.checkArgument(dst != null, "Expecting valid dst ByteBuffer");
+ if (isOutOfBounds(dstIndex, length, dst.capacity())) {
+ throw new IndexOutOfBoundsException("dstIndex: " + dstIndex + " length: " + length);
+ } else {
+ final long srcAddress = addr(index);
+ if (dst.hasMemoryAddress()) {
+ final long dstAddress = dst.memoryAddress() + (long) dstIndex;
+ PlatformDependent.copyMemory(srcAddress, dstAddress, (long) length);
+ } else if (dst.hasArray()) {
+ dstIndex += dst.arrayOffset();
+ PlatformDependent.copyMemory(srcAddress, dst.array(), dstIndex, (long) length);
+ } else {
+ dst.setBytes(dstIndex, this, index, length);
+ }
+ }
+ return this;
+ }
+
+ @Override
+ public ByteBuf setBytes(int index, ByteBuf src, int srcIndex, int length) {
+ chk(index, length);
+ Preconditions.checkArgument(src != null, "Expecting valid src ByteBuffer");
+ if (isOutOfBounds(srcIndex, length, src.capacity())) {
+ throw new IndexOutOfBoundsException("srcIndex: " + srcIndex + " length: " + length);
+ } else {
+ if (length != 0) {
+ final long dstAddress = addr(index);
+ if (src.hasMemoryAddress()) {
+ final long srcAddress = src.memoryAddress() + (long) srcIndex;
+ PlatformDependent.copyMemory(srcAddress, dstAddress, (long) length);
+ } else if (src.hasArray()) {
+ srcIndex += src.arrayOffset();
+ PlatformDependent.copyMemory(src.array(), srcIndex, dstAddress, (long) length);
+ } else {
+ src.getBytes(srcIndex, this, index, length);
+ }
+ }
+ }
+ return this;
+ }
+
+ @Override
+ public ByteBuf getBytes(int index, OutputStream out, int length) throws IOException {
+ arrowBuf.getBytes(index, out, length);
+ return this;
+ }
+
+ @Override
+ public int setBytes(int index, InputStream in, int length) throws IOException {
+ return arrowBuf.setBytes(index, in, length);
+ }
+
+ @Override
+ public int getBytes(int index, GatheringByteChannel out, int length) throws IOException {
+ Preconditions.checkArgument(out != null, "expecting valid gathering byte channel");
+ chk(index, length);
+ if (length == 0) {
+ return 0;
+ } else {
+ final ByteBuffer tmpBuf = getDirectBuffer(index);
+ tmpBuf.clear().limit(length);
+ return out.write(tmpBuf);
+ }
+ }
+
+ @Override
+ public int getBytes(int index, FileChannel out, long position, int length) throws IOException {
+ chk(index, length);
+ if (length == 0) {
+ return 0;
+ } else {
+ final ByteBuffer tmpBuf = getDirectBuffer(index);
+ tmpBuf.clear().limit(length);
+ return out.write(tmpBuf, position);
+ }
+ }
+
+ @Override
+ public int setBytes(int index, ScatteringByteChannel in, int length) throws IOException {
+ return (int) in.read(nioBuffers(index, length));
+ }
+
+ @Override
+ public int setBytes(int index, FileChannel in, long position, int length) throws IOException {
+ return (int) in.read(nioBuffers(index, length));
+ }
+
+ @Override
+ public ByteOrder order() {
+ return ByteOrder.LITTLE_ENDIAN;
+ }
+
+ @Override
+ public ByteBuf order(ByteOrder endianness) {
+ return this;
+ }
+
+ @Override
+ protected int _getUnsignedMedium(int index) {
+ return getUnsignedMedium(index);
+ }
+
+ @Override
+ protected int _getUnsignedMediumLE(int index) {
+ this.chk(index, 3);
+ long addr = this.addr(index);
+ return PlatformDependent.getByte(addr) & 255 |
+ (Short.reverseBytes(PlatformDependent.getShort(addr + 1L)) & '\uffff') << 8;
+ }
+
+
+ /*-------------------------------------------------*
+ | |
+ | get() APIs |
+ | |
+ *-------------------------------------------------*/
+
+
+ @Override
+ protected byte _getByte(int index) {
+ return getByte(index);
+ }
+
+ @Override
+ public byte getByte(int index) {
+ return arrowBuf.getByte(index);
+ }
+
+ @Override
+ protected short _getShortLE(int index) {
+ short s = getShort(index);
+ return Short.reverseBytes(s);
+ }
+
+ @Override
+ protected short _getShort(int index) {
+ return getShort(index);
+ }
+
+ @Override
+ public short getShort(int index) {
+ return arrowBuf.getShort(index);
+ }
+
+ @Override
+ protected int _getIntLE(int index) {
+ int value = getInt(index);
+ return Integer.reverseBytes(value);
+ }
+
+ @Override
+ protected int _getInt(int index) {
+ return getInt(index);
+ }
+
+ @Override
+ public int getInt(int index) {
+ return arrowBuf.getInt(index);
+ }
+
+ @Override
+ protected long _getLongLE(int index) {
+ long value = getLong(index);
+ return Long.reverseBytes(value);
+ }
+
+ @Override
+ protected long _getLong(int index) {
+ return getLong(index);
+ }
+
+ @Override
+ public long getLong(int index) {
+ return arrowBuf.getLong(index);
+ }
+
+
+ /*-------------------------------------------------*
+ | |
+ | set() APIs |
+ | |
+ *-------------------------------------------------*/
+
+
+ @Override
+ protected void _setByte(int index, int value) {
+ setByte(index, value);
+ }
+
+ @Override
+ public NettyArrowBuf setByte(int index, int value) {
+ arrowBuf.setByte(index, value);
+ return this;
+ }
+
+ @Override
+ protected void _setShortLE(int index, int value) {
+ this.chk(index, 2);
+ PlatformDependent.putShort(this.addr(index), Short.reverseBytes((short) value));
+ }
+
+ @Override
+ protected void _setShort(int index, int value) {
+ setShort(index, value);
+ }
+
+ @Override
+ public NettyArrowBuf setShort(int index, int value) {
+ arrowBuf.setShort(index, value);
+ return this;
+ }
+
+ private long addr(long index) {
+ return address + index;
+ }
+
+ /**
+ * Helper function to do bounds checking at a particular
+ * index for particular length of data.
+ *
+ * @param index index (0 based relative to this ArrowBuf)
+ * @param fieldLength provided length of data for get/set
+ */
+ private void chk(long index, long fieldLength) {
+ if (BoundsChecking.BOUNDS_CHECKING_ENABLED) {
+ // check reference count
+ ensureAccessible();
+ // check bounds
+ if (fieldLength < 0) {
+ throw new IllegalArgumentException("length: " + fieldLength + " (expected: >= 0)");
+ }
+ if (index < 0 || index > capacity() - fieldLength) {
+ throw new IndexOutOfBoundsException(String.format(
+ "index: %d, length: %d (expected: range(0, %d))", index, fieldLength, capacity()));
+ }
+ }
+ }
+
+ @Override
+ protected void _setMedium(int index, int value) {
+ setMedium(index, value);
+ }
+
+ @Override
+ protected void _setMediumLE(int index, int value) {
+ this.chk(index, 3);
+ long addr = this.addr(index);
+ PlatformDependent.putByte(addr, (byte) value);
+ PlatformDependent.putShort(addr + 1L, Short.reverseBytes((short) (value >>> 8)));
+ }
+
+ @Override
+ public NettyArrowBuf setMedium(int index, int value) {
+ chk(index, 3);
+ final long addr = addr(index);
+ // we need to store 3 bytes starting from least significant byte
+ // and ignoring the most significant byte
+ // since arrow memory format is little endian, we will
+ // first store the first 2 bytes followed by third byte
+ // example: if the 4 byte int value is ABCD where A is MSB
+ // D is LSB then we effectively want to store DCB in increasing
+ // address to get Little Endian byte order
+ // (short)value will give us CD and PlatformDependent.putShort()
+ // will store them in LE order as DC starting at address addr
+ // in order to get B, we do ABCD >>> 16 = 00AB => (byte)AB which
+ // gives B. We store this at address addr + 2. So finally we get
+ // DCB
+ PlatformDependent.putShort(addr, (short) value);
+ PlatformDependent.putByte(addr + 2, (byte) (value >>> 16));
+ return this;
+ }
+
+ @Override
+ protected void _setInt(int index, int value) {
+ setInt(index, value);
+ }
+
+ @Override
+ protected void _setIntLE(int index, int value) {
+ this.chk(index, 4);
+ PlatformDependent.putInt(this.addr(index), Integer.reverseBytes(value));
+ }
+
+ @Override
+ public NettyArrowBuf setInt(int index, int value) {
+ arrowBuf.setInt(index, value);
+ return this;
+ }
+
+ @Override
+ protected void _setLong(int index, long value) {
+ setLong(index, value);
+ }
+
+ @Override
+ public void _setLongLE(int index, long value) {
+ this.chk(index, 8);
+ PlatformDependent.putLong(this.addr(index), Long.reverseBytes(value));
+ }
+
+ @Override
+ public NettyArrowBuf setLong(int index, long value) {
+ arrowBuf.setLong(index, value);
+ return this;
+ }
+
+ /**
+ * unwrap arrow buffer into a netty buffer.
+ */
+ public static NettyArrowBuf unwrapBuffer(ArrowBuf buf) {
+ final NettyArrowBuf nettyArrowBuf = new NettyArrowBuf(
+ buf,
+ buf.getReferenceManager().getAllocator(),
+ checkedCastToInt(buf.capacity()));
+ nettyArrowBuf.readerIndex(checkedCastToInt(buf.readerIndex()));
+ nettyArrowBuf.writerIndex(checkedCastToInt(buf.writerIndex()));
+ return nettyArrowBuf;
+ }
+
+}
diff --git a/src/arrow/java/memory/memory-netty/src/main/java/io/netty/buffer/PooledByteBufAllocatorL.java b/src/arrow/java/memory/memory-netty/src/main/java/io/netty/buffer/PooledByteBufAllocatorL.java
new file mode 100644
index 000000000..d0a5a9945
--- /dev/null
+++ b/src/arrow/java/memory/memory-netty/src/main/java/io/netty/buffer/PooledByteBufAllocatorL.java
@@ -0,0 +1,280 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.netty.buffer;
+
+import static org.apache.arrow.memory.util.AssertionUtil.ASSERT_ENABLED;
+
+import java.lang.reflect.Field;
+import java.nio.ByteBuffer;
+import java.util.concurrent.atomic.AtomicLong;
+
+import org.apache.arrow.memory.OutOfMemoryException;
+import org.apache.arrow.memory.util.LargeMemoryUtil;
+
+import io.netty.util.internal.OutOfDirectMemoryError;
+import io.netty.util.internal.StringUtil;
+
+/**
+ * The base allocator that we use for all of Arrow's memory management. Returns
+ * UnsafeDirectLittleEndian buffers.
+ */
+public class PooledByteBufAllocatorL {
+
+ private static final org.slf4j.Logger memoryLogger = org.slf4j.LoggerFactory.getLogger("arrow.allocator");
+
+ private static final int MEMORY_LOGGER_FREQUENCY_SECONDS = 60;
+ public final UnsafeDirectLittleEndian empty;
+ private final AtomicLong hugeBufferSize = new AtomicLong(0);
+ private final AtomicLong hugeBufferCount = new AtomicLong(0);
+ private final AtomicLong normalBufferSize = new AtomicLong(0);
+ private final AtomicLong normalBufferCount = new AtomicLong(0);
+ private final InnerAllocator allocator;
+
+ public PooledByteBufAllocatorL() {
+ allocator = new InnerAllocator();
+ empty = new UnsafeDirectLittleEndian(new DuplicatedByteBuf(Unpooled.EMPTY_BUFFER));
+ }
+
+ /**
+ * Returns a {@linkplain io.netty.buffer.UnsafeDirectLittleEndian} of the given size.
+ */
+ public UnsafeDirectLittleEndian allocate(long size) {
+ try {
+ return allocator.directBuffer(LargeMemoryUtil.checkedCastToInt(size), Integer.MAX_VALUE);
+ } catch (OutOfMemoryError e) {
+ /*
+ * OutOfDirectMemoryError is thrown by Netty when we exceed the direct memory limit defined by
+ * -XX:MaxDirectMemorySize. OutOfMemoryError with "Direct buffer memory" message is thrown by
+ * java.nio.Bits when we exceed the direct memory limit. This should never be hit in practice
+ * as Netty is expected to throw an OutOfDirectMemoryError first.
+ */
+ if (e instanceof OutOfDirectMemoryError || "Direct buffer memory".equals(e.getMessage())) {
+ throw new OutOfMemoryException("Failure allocating buffer.", e);
+ }
+ throw e;
+ }
+ }
+
+ public int getChunkSize() {
+ return allocator.chunkSize;
+ }
+
+ public long getHugeBufferSize() {
+ return hugeBufferSize.get();
+ }
+
+ public long getHugeBufferCount() {
+ return hugeBufferCount.get();
+ }
+
+ public long getNormalBufferSize() {
+ return normalBufferSize.get();
+ }
+
+ public long getNormalBufferCount() {
+ return normalBufferSize.get();
+ }
+
+ private static class AccountedUnsafeDirectLittleEndian extends UnsafeDirectLittleEndian {
+
+ private final long initialCapacity;
+ private final AtomicLong count;
+ private final AtomicLong size;
+
+ private AccountedUnsafeDirectLittleEndian(LargeBuffer buf, AtomicLong count, AtomicLong size) {
+ super(buf);
+ this.initialCapacity = buf.capacity();
+ this.count = count;
+ this.size = size;
+ }
+
+ private AccountedUnsafeDirectLittleEndian(PooledUnsafeDirectByteBuf buf, AtomicLong count,
+ AtomicLong size) {
+ super(buf);
+ this.initialCapacity = buf.capacity();
+ this.count = count;
+ this.size = size;
+ }
+
+ @Override
+ public ByteBuf copy() {
+ throw new UnsupportedOperationException("copy method is not supported");
+ }
+
+ @Override
+ public ByteBuf copy(int index, int length) {
+ throw new UnsupportedOperationException("copy method is not supported");
+ }
+
+ @Override
+ public boolean release(int decrement) {
+ boolean released = super.release(decrement);
+ if (released) {
+ count.decrementAndGet();
+ size.addAndGet(-initialCapacity);
+ }
+ return released;
+ }
+
+ }
+
+ private class InnerAllocator extends PooledByteBufAllocator {
+
+ private final PoolArena<ByteBuffer>[] directArenas;
+ private final MemoryStatusThread statusThread;
+ private final int chunkSize;
+
+ public InnerAllocator() {
+ super(true);
+
+ try {
+ Field f = PooledByteBufAllocator.class.getDeclaredField("directArenas");
+ f.setAccessible(true);
+ this.directArenas = (PoolArena<ByteBuffer>[]) f.get(this);
+ } catch (Exception e) {
+ throw new RuntimeException("Failure while initializing allocator. Unable to retrieve direct arenas field.", e);
+ }
+
+ this.chunkSize = directArenas[0].chunkSize;
+
+ if (memoryLogger.isTraceEnabled()) {
+ statusThread = new MemoryStatusThread();
+ statusThread.start();
+ } else {
+ statusThread = null;
+ }
+ }
+
+ private UnsafeDirectLittleEndian newDirectBufferL(int initialCapacity, int maxCapacity) {
+ PoolThreadCache cache = threadCache();
+ PoolArena<ByteBuffer> directArena = cache.directArena;
+
+ if (directArena != null) {
+
+ if (initialCapacity > directArena.chunkSize) {
+ // This is beyond chunk size so we'll allocate separately.
+ ByteBuf buf = UnpooledByteBufAllocator.DEFAULT.directBuffer(initialCapacity, maxCapacity);
+
+ hugeBufferSize.addAndGet(buf.capacity());
+ hugeBufferCount.incrementAndGet();
+
+ // logger.debug("Allocating huge buffer of size {}", initialCapacity, new Exception());
+ return new AccountedUnsafeDirectLittleEndian(new LargeBuffer(buf), hugeBufferCount,
+ hugeBufferSize);
+ } else {
+ // within chunk, use arena.
+ ByteBuf buf = directArena.allocate(cache, initialCapacity, maxCapacity);
+ if (!(buf instanceof PooledUnsafeDirectByteBuf)) {
+ fail();
+ }
+
+ if (!ASSERT_ENABLED) {
+ return new UnsafeDirectLittleEndian((PooledUnsafeDirectByteBuf) buf);
+ }
+
+ normalBufferSize.addAndGet(buf.capacity());
+ normalBufferCount.incrementAndGet();
+
+ return new AccountedUnsafeDirectLittleEndian((PooledUnsafeDirectByteBuf) buf,
+ normalBufferCount, normalBufferSize);
+ }
+
+ } else {
+ throw fail();
+ }
+ }
+
+ private UnsupportedOperationException fail() {
+ return new UnsupportedOperationException(
+ "Arrow requires that the JVM used supports access sun.misc.Unsafe. This platform " +
+ "didn't provide that functionality.");
+ }
+
+ @Override
+ public UnsafeDirectLittleEndian directBuffer(int initialCapacity, int maxCapacity) {
+ if (initialCapacity == 0 && maxCapacity == 0) {
+ newDirectBuffer(initialCapacity, maxCapacity);
+ }
+ validate(initialCapacity, maxCapacity);
+ return newDirectBufferL(initialCapacity, maxCapacity);
+ }
+
+ @Override
+ public ByteBuf heapBuffer(int initialCapacity, int maxCapacity) {
+ throw new UnsupportedOperationException("Arrow doesn't support using heap buffers.");
+ }
+
+
+ private void validate(int initialCapacity, int maxCapacity) {
+ if (initialCapacity < 0) {
+ throw new IllegalArgumentException("initialCapacity: " + initialCapacity + " (expected: 0+)");
+ }
+ if (initialCapacity > maxCapacity) {
+ throw new IllegalArgumentException(String.format(
+ "initialCapacity: %d (expected: not greater than maxCapacity(%d)",
+ initialCapacity, maxCapacity));
+ }
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder buf = new StringBuilder();
+ buf.append(directArenas.length);
+ buf.append(" direct arena(s):");
+ buf.append(StringUtil.NEWLINE);
+ for (PoolArena<ByteBuffer> a : directArenas) {
+ buf.append(a);
+ }
+
+ buf.append("Large buffers outstanding: ");
+ buf.append(hugeBufferCount.get());
+ buf.append(" totaling ");
+ buf.append(hugeBufferSize.get());
+ buf.append(" bytes.");
+ buf.append('\n');
+ buf.append("Normal buffers outstanding: ");
+ buf.append(normalBufferCount.get());
+ buf.append(" totaling ");
+ buf.append(normalBufferSize.get());
+ buf.append(" bytes.");
+ return buf.toString();
+ }
+
+ private class MemoryStatusThread extends Thread {
+
+ public MemoryStatusThread() {
+ super("allocation.logger");
+ this.setDaemon(true);
+ }
+
+ @Override
+ public void run() {
+ while (true) {
+ memoryLogger.trace("Memory Usage: \n{}", PooledByteBufAllocatorL.this.toString());
+ try {
+ Thread.sleep(MEMORY_LOGGER_FREQUENCY_SECONDS * 1000);
+ } catch (InterruptedException e) {
+ return;
+ }
+ }
+ }
+ }
+
+
+ }
+}
diff --git a/src/arrow/java/memory/memory-netty/src/main/java/io/netty/buffer/UnsafeDirectLittleEndian.java b/src/arrow/java/memory/memory-netty/src/main/java/io/netty/buffer/UnsafeDirectLittleEndian.java
new file mode 100644
index 000000000..e900b1ca7
--- /dev/null
+++ b/src/arrow/java/memory/memory-netty/src/main/java/io/netty/buffer/UnsafeDirectLittleEndian.java
@@ -0,0 +1,270 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.netty.buffer;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.ByteOrder;
+import java.util.concurrent.atomic.AtomicLong;
+
+import io.netty.util.internal.PlatformDependent;
+
+/**
+ * The underlying class we use for little-endian access to memory. Is used underneath ArrowBufs
+ * to abstract away the
+ * Netty classes and underlying Netty memory management.
+ */
+public class UnsafeDirectLittleEndian extends WrappedByteBuf {
+
+ public static final boolean ASSERT_ENABLED;
+ private static final AtomicLong ID_GENERATOR = new AtomicLong(0);
+
+ static {
+ boolean isAssertEnabled = false;
+ assert isAssertEnabled = true;
+ ASSERT_ENABLED = isAssertEnabled;
+ }
+
+ public final long id = ID_GENERATOR.incrementAndGet();
+ private final AbstractByteBuf wrapped;
+ private final long memoryAddress;
+
+ UnsafeDirectLittleEndian(DuplicatedByteBuf buf) {
+ this(buf, true);
+ }
+
+ UnsafeDirectLittleEndian(LargeBuffer buf) {
+ this(buf, true);
+ }
+
+ UnsafeDirectLittleEndian(PooledUnsafeDirectByteBuf buf) {
+ this(buf, true);
+ }
+
+ private UnsafeDirectLittleEndian(AbstractByteBuf buf, boolean fake) {
+ super(buf);
+
+ this.wrapped = buf;
+ this.memoryAddress = buf.memoryAddress();
+ }
+
+ private long addr(int index) {
+ return memoryAddress + index;
+ }
+
+ @Override
+ public long getLong(int index) {
+ // wrapped.checkIndex(index, 8);
+ long v = PlatformDependent.getLong(addr(index));
+ return v;
+ }
+
+ @Override
+ public float getFloat(int index) {
+ return Float.intBitsToFloat(getInt(index));
+ }
+
+ @Override
+ public ByteBuf slice() {
+ return slice(this.readerIndex(), readableBytes());
+ }
+
+ @Override
+ public ByteBuf slice(int index, int length) {
+ return new SlicedByteBuf(this, index, length);
+ }
+
+ @Override
+ public ByteBuf order(ByteOrder endianness) {
+ return this;
+ }
+
+ @Override
+ public double getDouble(int index) {
+ return Double.longBitsToDouble(getLong(index));
+ }
+
+ @Override
+ public char getChar(int index) {
+ return (char) getShort(index);
+ }
+
+ @Override
+ public long getUnsignedInt(int index) {
+ return getInt(index) & 0xFFFFFFFFL;
+ }
+
+ @Override
+ public int getInt(int index) {
+ int v = PlatformDependent.getInt(addr(index));
+ return v;
+ }
+
+ @Override
+ public int getUnsignedShort(int index) {
+ return getShort(index) & 0xFFFF;
+ }
+
+ @Override
+ public short getShort(int index) {
+ short v = PlatformDependent.getShort(addr(index));
+ return v;
+ }
+
+ @Override
+ public ByteBuf setShort(int index, int value) {
+ wrapped.checkIndex(index, 2);
+ setShort_(index, value);
+ return this;
+ }
+
+ @Override
+ public ByteBuf setInt(int index, int value) {
+ wrapped.checkIndex(index, 4);
+ setInt_(index, value);
+ return this;
+ }
+
+ @Override
+ public ByteBuf setLong(int index, long value) {
+ wrapped.checkIndex(index, 8);
+ setLong_(index, value);
+ return this;
+ }
+
+ @Override
+ public ByteBuf setChar(int index, int value) {
+ setShort(index, value);
+ return this;
+ }
+
+ @Override
+ public ByteBuf setFloat(int index, float value) {
+ setInt(index, Float.floatToRawIntBits(value));
+ return this;
+ }
+
+ @Override
+ public ByteBuf setDouble(int index, double value) {
+ setLong(index, Double.doubleToRawLongBits(value));
+ return this;
+ }
+
+ @Override
+ public ByteBuf writeShort(int value) {
+ wrapped.ensureWritable(2);
+ setShort_(wrapped.writerIndex, value);
+ wrapped.writerIndex += 2;
+ return this;
+ }
+
+ @Override
+ public ByteBuf writeInt(int value) {
+ wrapped.ensureWritable(4);
+ setInt_(wrapped.writerIndex, value);
+ wrapped.writerIndex += 4;
+ return this;
+ }
+
+ @Override
+ public ByteBuf writeLong(long value) {
+ wrapped.ensureWritable(8);
+ setLong_(wrapped.writerIndex, value);
+ wrapped.writerIndex += 8;
+ return this;
+ }
+
+ @Override
+ public ByteBuf writeChar(int value) {
+ writeShort(value);
+ return this;
+ }
+
+ @Override
+ public ByteBuf writeFloat(float value) {
+ writeInt(Float.floatToRawIntBits(value));
+ return this;
+ }
+
+ @Override
+ public ByteBuf writeDouble(double value) {
+ writeLong(Double.doubleToRawLongBits(value));
+ return this;
+ }
+
+ private void setShort_(int index, int value) {
+ PlatformDependent.putShort(addr(index), (short) value);
+ }
+
+ private void setInt_(int index, int value) {
+ PlatformDependent.putInt(addr(index), value);
+ }
+
+ private void setLong_(int index, long value) {
+ PlatformDependent.putLong(addr(index), value);
+ }
+
+ @Override
+ public byte getByte(int index) {
+ return PlatformDependent.getByte(addr(index));
+ }
+
+ @Override
+ public ByteBuf setByte(int index, int value) {
+ PlatformDependent.putByte(addr(index), (byte) value);
+ return this;
+ }
+
+ @Override
+ public boolean release() {
+ return release(1);
+ }
+
+ @Override
+ public int setBytes(int index, InputStream in, int length) throws IOException {
+ wrapped.checkIndex(index, length);
+ byte[] tmp = new byte[length];
+ int readBytes = in.read(tmp);
+ if (readBytes > 0) {
+ PlatformDependent.copyMemory(tmp, 0, addr(index), readBytes);
+ }
+ return readBytes;
+ }
+
+ @Override
+ public ByteBuf getBytes(int index, OutputStream out, int length) throws IOException {
+ wrapped.checkIndex(index, length);
+ if (length != 0) {
+ byte[] tmp = new byte[length];
+ PlatformDependent.copyMemory(addr(index), tmp, 0, length);
+ out.write(tmp);
+ }
+ return this;
+ }
+
+ @Override
+ public int hashCode() {
+ return System.identityHashCode(this);
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ return this == obj;
+ }
+}
diff --git a/src/arrow/java/memory/memory-netty/src/main/java/org/apache/arrow/memory/ArrowByteBufAllocator.java b/src/arrow/java/memory/memory-netty/src/main/java/org/apache/arrow/memory/ArrowByteBufAllocator.java
new file mode 100644
index 000000000..ff40b49ff
--- /dev/null
+++ b/src/arrow/java/memory/memory-netty/src/main/java/org/apache/arrow/memory/ArrowByteBufAllocator.java
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+import io.netty.buffer.AbstractByteBufAllocator;
+import io.netty.buffer.ByteBuf;
+import io.netty.buffer.CompositeByteBuf;
+import io.netty.buffer.ExpandableByteBuf;
+import io.netty.buffer.NettyArrowBuf;
+
+/**
+ * An implementation of ByteBufAllocator that wraps a Arrow BufferAllocator. This allows the RPC
+ * layer to be accounted
+ * and managed using Arrow's BufferAllocator infrastructure. The only thin different from a
+ * typical BufferAllocator is
+ * the signature and the fact that this Allocator returns ExpandableByteBufs which enable
+ * otherwise non-expandable
+ * ArrowBufs to be expandable.
+ *
+ * @deprecated This class may be removed in a future release.
+ */
+@Deprecated
+public class ArrowByteBufAllocator extends AbstractByteBufAllocator {
+
+ private static final int DEFAULT_BUFFER_SIZE = 4096;
+ private static final int DEFAULT_MAX_COMPOSITE_COMPONENTS = 16;
+
+ private final BufferAllocator allocator;
+
+ public ArrowByteBufAllocator(BufferAllocator allocator) {
+ this.allocator = allocator;
+ }
+
+ public BufferAllocator unwrap() {
+ return allocator;
+ }
+
+ @Override
+ public ByteBuf buffer() {
+ return buffer(DEFAULT_BUFFER_SIZE);
+ }
+
+ @Override
+ public ByteBuf buffer(int initialCapacity) {
+ return new ExpandableByteBuf(NettyArrowBuf.unwrapBuffer(allocator.buffer(initialCapacity)), allocator);
+ }
+
+ @Override
+ public ByteBuf buffer(int initialCapacity, int maxCapacity) {
+ return buffer(initialCapacity);
+ }
+
+ @Override
+ public ByteBuf ioBuffer() {
+ return buffer();
+ }
+
+ @Override
+ public ByteBuf ioBuffer(int initialCapacity) {
+ return buffer(initialCapacity);
+ }
+
+ @Override
+ public ByteBuf ioBuffer(int initialCapacity, int maxCapacity) {
+ return buffer(initialCapacity);
+ }
+
+ @Override
+ public ByteBuf directBuffer() {
+ return buffer();
+ }
+
+ @Override
+ public ByteBuf directBuffer(int initialCapacity) {
+ return NettyArrowBuf.unwrapBuffer(allocator.buffer(initialCapacity));
+ }
+
+ @Override
+ public ByteBuf directBuffer(int initialCapacity, int maxCapacity) {
+ return buffer(initialCapacity, maxCapacity);
+ }
+
+ @Override
+ public CompositeByteBuf compositeBuffer() {
+ return compositeBuffer(DEFAULT_MAX_COMPOSITE_COMPONENTS);
+ }
+
+ @Override
+ public CompositeByteBuf compositeBuffer(int maxNumComponents) {
+ return new CompositeByteBuf(this, true, maxNumComponents);
+ }
+
+ @Override
+ public CompositeByteBuf compositeDirectBuffer() {
+ return compositeBuffer();
+ }
+
+ @Override
+ public CompositeByteBuf compositeDirectBuffer(int maxNumComponents) {
+ return compositeBuffer(maxNumComponents);
+ }
+
+ @Override
+ public boolean isDirectBufferPooled() {
+ return false;
+ }
+
+ @Override
+ public ByteBuf heapBuffer() {
+ throw fail();
+ }
+
+ @Override
+ public ByteBuf heapBuffer(int initialCapacity) {
+ throw fail();
+ }
+
+ @Override
+ public ByteBuf heapBuffer(int initialCapacity, int maxCapacity) {
+ throw fail();
+ }
+
+ @Override
+ public CompositeByteBuf compositeHeapBuffer() {
+ throw fail();
+ }
+
+ @Override
+ public CompositeByteBuf compositeHeapBuffer(int maxNumComponents) {
+ throw fail();
+ }
+
+ @Override
+ protected ByteBuf newHeapBuffer(int initialCapacity, int maxCapacity) {
+ throw fail();
+ }
+
+ @Override
+ protected ByteBuf newDirectBuffer(int initialCapacity, int maxCapacity) {
+ return buffer(initialCapacity, maxCapacity);
+ }
+
+ private RuntimeException fail() {
+ throw new UnsupportedOperationException("Allocator doesn't support heap-based memory.");
+ }
+}
diff --git a/src/arrow/java/memory/memory-netty/src/main/java/org/apache/arrow/memory/DefaultAllocationManagerFactory.java b/src/arrow/java/memory/memory-netty/src/main/java/org/apache/arrow/memory/DefaultAllocationManagerFactory.java
new file mode 100644
index 000000000..10cfb5c16
--- /dev/null
+++ b/src/arrow/java/memory/memory-netty/src/main/java/org/apache/arrow/memory/DefaultAllocationManagerFactory.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+/**
+ * The default Allocation Manager Factory for a module.
+ *
+ */
+public class DefaultAllocationManagerFactory implements AllocationManager.Factory {
+
+ public static final AllocationManager.Factory FACTORY = NettyAllocationManager.FACTORY;
+
+ @Override
+ public AllocationManager create(BufferAllocator accountingAllocator, long size) {
+ return FACTORY.create(accountingAllocator, size);
+ }
+
+ @Override
+ public ArrowBuf empty() {
+ return FACTORY.empty();
+ }
+
+}
diff --git a/src/arrow/java/memory/memory-netty/src/main/java/org/apache/arrow/memory/NettyAllocationManager.java b/src/arrow/java/memory/memory-netty/src/main/java/org/apache/arrow/memory/NettyAllocationManager.java
new file mode 100644
index 000000000..200047783
--- /dev/null
+++ b/src/arrow/java/memory/memory-netty/src/main/java/org/apache/arrow/memory/NettyAllocationManager.java
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+import io.netty.buffer.PooledByteBufAllocatorL;
+import io.netty.buffer.UnsafeDirectLittleEndian;
+import io.netty.util.internal.PlatformDependent;
+
+/**
+ * The default implementation of {@link AllocationManager}. The implementation is responsible for managing when memory
+ * is allocated and returned to the Netty-based PooledByteBufAllocatorL.
+ */
+public class NettyAllocationManager extends AllocationManager {
+
+ public static final AllocationManager.Factory FACTORY = new AllocationManager.Factory() {
+
+ @Override
+ public AllocationManager create(BufferAllocator accountingAllocator, long size) {
+ return new NettyAllocationManager(accountingAllocator, size);
+ }
+
+ @Override
+ public ArrowBuf empty() {
+ return EMPTY_BUFFER;
+ }
+ };
+
+ /**
+ * The default cut-off value for switching allocation strategies.
+ * If the request size is not greater than the cut-off value, we will allocate memory by
+ * {@link PooledByteBufAllocatorL} APIs,
+ * otherwise, we will use {@link PlatformDependent} APIs.
+ */
+ public static final int DEFAULT_ALLOCATION_CUTOFF_VALUE = Integer.MAX_VALUE;
+
+ private static final PooledByteBufAllocatorL INNER_ALLOCATOR = new PooledByteBufAllocatorL();
+ static final UnsafeDirectLittleEndian EMPTY = INNER_ALLOCATOR.empty;
+ static final ArrowBuf EMPTY_BUFFER = new ArrowBuf(ReferenceManager.NO_OP,
+ null,
+ 0,
+ NettyAllocationManager.EMPTY.memoryAddress());
+ static final long CHUNK_SIZE = INNER_ALLOCATOR.getChunkSize();
+
+ private final long allocatedSize;
+ private final UnsafeDirectLittleEndian memoryChunk;
+ private final long allocatedAddress;
+
+ /**
+ * The cut-off value for switching allocation strategies.
+ */
+ private final int allocationCutOffValue;
+
+ NettyAllocationManager(BufferAllocator accountingAllocator, long requestedSize, int allocationCutOffValue) {
+ super(accountingAllocator);
+ this.allocationCutOffValue = allocationCutOffValue;
+
+ if (requestedSize > allocationCutOffValue) {
+ this.memoryChunk = null;
+ this.allocatedAddress = PlatformDependent.allocateMemory(requestedSize);
+ this.allocatedSize = requestedSize;
+ } else {
+ this.memoryChunk = INNER_ALLOCATOR.allocate(requestedSize);
+ this.allocatedAddress = memoryChunk.memoryAddress();
+ this.allocatedSize = memoryChunk.capacity();
+ }
+ }
+
+ NettyAllocationManager(BufferAllocator accountingAllocator, long requestedSize) {
+ this(accountingAllocator, requestedSize, DEFAULT_ALLOCATION_CUTOFF_VALUE);
+ }
+
+ /**
+ * Get the underlying memory chunk managed by this AllocationManager.
+ * @return the underlying memory chunk if the request size is not greater than the
+ * {@link NettyAllocationManager#allocationCutOffValue}, or null otherwise.
+ *
+ * @deprecated this method will be removed in a future release.
+ */
+ @Deprecated
+ UnsafeDirectLittleEndian getMemoryChunk() {
+ return memoryChunk;
+ }
+
+ @Override
+ protected long memoryAddress() {
+ return allocatedAddress;
+ }
+
+ @Override
+ protected void release0() {
+ if (memoryChunk == null) {
+ PlatformDependent.freeMemory(allocatedAddress);
+ } else {
+ memoryChunk.release();
+ }
+ }
+
+ /**
+ * Returns the underlying memory chunk size managed.
+ *
+ * <p>NettyAllocationManager rounds requested size up to the next power of two.
+ */
+ @Override
+ public long getSize() {
+ return allocatedSize;
+ }
+
+}
diff --git a/src/arrow/java/memory/memory-netty/src/test/java/io/netty/buffer/TestNettyArrowBuf.java b/src/arrow/java/memory/memory-netty/src/test/java/io/netty/buffer/TestNettyArrowBuf.java
new file mode 100644
index 000000000..916cf82e7
--- /dev/null
+++ b/src/arrow/java/memory/memory-netty/src/test/java/io/netty/buffer/TestNettyArrowBuf.java
@@ -0,0 +1,141 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.netty.buffer;
+
+import java.nio.ByteBuffer;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.ArrowByteBufAllocator;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestNettyArrowBuf {
+
+ @Test
+ public void testSliceWithoutArgs() {
+ try (BufferAllocator allocator = new RootAllocator(128);
+ ArrowBuf buf = allocator.buffer(20);
+ ) {
+ NettyArrowBuf nettyBuf = NettyArrowBuf.unwrapBuffer(buf);
+ nettyBuf.writerIndex(20);
+ nettyBuf.readerIndex(10);
+ NettyArrowBuf slicedBuffer = nettyBuf.slice();
+ int readableBytes = slicedBuffer.readableBytes();
+ Assert.assertEquals(10, readableBytes);
+ }
+ }
+
+ @Test
+ public void testNioBuffer() {
+ try (BufferAllocator allocator = new RootAllocator(128);
+ ArrowBuf buf = allocator.buffer(20);
+ ) {
+ NettyArrowBuf nettyBuf = NettyArrowBuf.unwrapBuffer(buf);
+ ByteBuffer byteBuffer = nettyBuf.nioBuffer(4, 6);
+ // Nio Buffers should always be 0 indexed
+ Assert.assertEquals(0, byteBuffer.position());
+ Assert.assertEquals(6, byteBuffer.limit());
+ // Underlying buffer has size 32 excluding 4 should have capacity of 28.
+ Assert.assertEquals(28, byteBuffer.capacity());
+
+ }
+ }
+
+ @Test
+ public void testInternalNioBuffer() {
+ try (BufferAllocator allocator = new RootAllocator(128);
+ ArrowBuf buf = allocator.buffer(20);
+ ) {
+ NettyArrowBuf nettyBuf = NettyArrowBuf.unwrapBuffer(buf);
+ ByteBuffer byteBuffer = nettyBuf.internalNioBuffer(4, 6);
+ Assert.assertEquals(0, byteBuffer.position());
+ Assert.assertEquals(6, byteBuffer.limit());
+ // Underlying buffer has size 32 excluding 4 should have capacity of 28.
+ Assert.assertEquals(28, byteBuffer.capacity());
+
+ }
+ }
+
+ @Test
+ public void testSetLEValues() {
+ try (BufferAllocator allocator = new RootAllocator(128);
+ ArrowBuf buf = allocator.buffer(20);
+ ) {
+ NettyArrowBuf nettyBuf = NettyArrowBuf.unwrapBuffer(buf);
+ int [] intVals = new int[] {Integer.MIN_VALUE, Short.MIN_VALUE - 1, Short.MIN_VALUE, 0 ,
+ Short.MAX_VALUE , Short.MAX_VALUE + 1, Integer.MAX_VALUE};
+ for (int intValue :intVals ) {
+ nettyBuf._setInt(0, intValue);
+ Assert.assertEquals(nettyBuf._getIntLE(0), Integer.reverseBytes(intValue));
+ }
+
+ long [] longVals = new long[] {Long.MIN_VALUE, 0 , Long.MAX_VALUE};
+ for (long longValue :longVals ) {
+ nettyBuf._setLong(0, longValue);
+ Assert.assertEquals(nettyBuf._getLongLE(0), Long.reverseBytes(longValue));
+ }
+
+ short [] shortVals = new short[] {Short.MIN_VALUE, 0 , Short.MAX_VALUE};
+ for (short shortValue :shortVals ) {
+ nettyBuf._setShort(0, shortValue);
+ Assert.assertEquals(nettyBuf._getShortLE(0), Short.reverseBytes(shortValue));
+ }
+ }
+ }
+
+ @Test
+ public void testSetCompositeBuffer() {
+ try (BufferAllocator allocator = new RootAllocator(128);
+ ArrowBuf buf = allocator.buffer(20);
+ NettyArrowBuf buf2 = NettyArrowBuf.unwrapBuffer(allocator.buffer(20));
+ ) {
+ CompositeByteBuf byteBufs = new CompositeByteBuf(new ArrowByteBufAllocator(allocator),
+ true, 1);
+ int expected = 4;
+ buf2.setInt(0, expected);
+ buf2.writerIndex(4);
+ byteBufs.addComponent(true, buf2);
+ NettyArrowBuf.unwrapBuffer(buf).setBytes(0, byteBufs, 4);
+ int actual = buf.getInt(0);
+ Assert.assertEquals(expected, actual);
+ }
+ }
+
+ @Test
+ public void testGetCompositeBuffer() {
+ try (BufferAllocator allocator = new RootAllocator(128);
+ ArrowBuf buf = allocator.buffer(20);
+ ) {
+ CompositeByteBuf byteBufs = new CompositeByteBuf(new ArrowByteBufAllocator(allocator),
+ true, 1);
+ int expected = 4;
+ buf.setInt(0, expected);
+ NettyArrowBuf buf2 = NettyArrowBuf.unwrapBuffer(allocator.buffer(20));
+ // composite buffers are a bit weird, need to jump hoops
+ // to set capacity.
+ byteBufs.addComponent(true, buf2);
+ byteBufs.capacity(20);
+ NettyArrowBuf.unwrapBuffer(buf).getBytes(0, byteBufs, 4);
+ int actual = byteBufs.getInt(0);
+ Assert.assertEquals(expected, actual);
+ byteBufs.component(0).release();
+ }
+ }
+}
diff --git a/src/arrow/java/memory/memory-netty/src/test/java/io/netty/buffer/TestUnsafeDirectLittleEndian.java b/src/arrow/java/memory/memory-netty/src/test/java/io/netty/buffer/TestUnsafeDirectLittleEndian.java
new file mode 100644
index 000000000..c2bd95bb3
--- /dev/null
+++ b/src/arrow/java/memory/memory-netty/src/test/java/io/netty/buffer/TestUnsafeDirectLittleEndian.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.netty.buffer;
+
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.ByteOrder;
+import java.nio.charset.StandardCharsets;
+
+import org.junit.Test;
+
+public class TestUnsafeDirectLittleEndian {
+ private static final boolean LITTLE_ENDIAN = ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN;
+
+ @Test
+ public void testPrimitiveGetSet() {
+ ByteBuf byteBuf = Unpooled.directBuffer(64);
+ UnsafeDirectLittleEndian unsafeDirect = new UnsafeDirectLittleEndian(new LargeBuffer(byteBuf));
+
+ unsafeDirect.setByte(0, Byte.MAX_VALUE);
+ unsafeDirect.setByte(1, -1); // 0xFF
+ unsafeDirect.setShort(2, Short.MAX_VALUE);
+ unsafeDirect.setShort(4, -2); // 0xFFFE
+ unsafeDirect.setInt(8, Integer.MAX_VALUE);
+ unsafeDirect.setInt(12, -66052); // 0xFFFE FDFC
+ unsafeDirect.setLong(16, Long.MAX_VALUE);
+ unsafeDirect.setLong(24, -4295098372L); // 0xFFFF FFFE FFFD FFFC
+ unsafeDirect.setFloat(32, 1.23F);
+ unsafeDirect.setFloat(36, -1.23F);
+ unsafeDirect.setDouble(40, 1.234567D);
+ unsafeDirect.setDouble(48, -1.234567D);
+
+ assertEquals(Byte.MAX_VALUE, unsafeDirect.getByte(0));
+ assertEquals(-1, unsafeDirect.getByte(1));
+ assertEquals(Short.MAX_VALUE, unsafeDirect.getShort(2));
+ assertEquals(-2, unsafeDirect.getShort(4));
+ assertEquals((char) 65534, unsafeDirect.getChar(4));
+ assertEquals(Integer.MAX_VALUE, unsafeDirect.getInt(8));
+ assertEquals(-66052, unsafeDirect.getInt(12));
+ assertEquals(4294901244L, unsafeDirect.getUnsignedInt(12));
+ assertEquals(Long.MAX_VALUE, unsafeDirect.getLong(16));
+ assertEquals(-4295098372L, unsafeDirect.getLong(24));
+ assertEquals(1.23F, unsafeDirect.getFloat(32), 0.0);
+ assertEquals(-1.23F, unsafeDirect.getFloat(36), 0.0);
+ assertEquals(1.234567D, unsafeDirect.getDouble(40), 0.0);
+ assertEquals(-1.234567D, unsafeDirect.getDouble(48), 0.0);
+
+ byte[] inBytes = "1234567".getBytes(StandardCharsets.UTF_8);
+ try (ByteArrayInputStream bais = new ByteArrayInputStream(inBytes);
+ ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
+ assertEquals(5, unsafeDirect.setBytes(56, bais, 5));
+ unsafeDirect.getBytes(56, baos, 5);
+ assertEquals("12345", new String(baos.toByteArray(), StandardCharsets.UTF_8));
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+}
diff --git a/src/arrow/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/ITTestLargeArrowBuf.java b/src/arrow/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/ITTestLargeArrowBuf.java
new file mode 100644
index 000000000..fa8d510e3
--- /dev/null
+++ b/src/arrow/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/ITTestLargeArrowBuf.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+import static org.junit.Assert.assertEquals;
+
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Integration test for large (more than 2GB) {@link org.apache.arrow.memory.ArrowBuf}.
+ * To run this test, please make sure there is at least 4GB memory in the system.
+ */
+public class ITTestLargeArrowBuf {
+ private static final Logger logger = LoggerFactory.getLogger(ITTestLargeArrowBuf.class);
+
+ private void run(long bufSize) {
+ try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+ ArrowBuf largeBuf = allocator.buffer(bufSize)) {
+ assertEquals(bufSize, largeBuf.capacity());
+ logger.trace("Successfully allocated a buffer with capacity {}", largeBuf.capacity());
+
+ for (long i = 0; i < bufSize / 8; i++) {
+ largeBuf.setLong(i * 8, i);
+
+ if ((i + 1) % 10000 == 0) {
+ logger.trace("Successfully written {} long words", i + 1);
+ }
+ }
+ logger.trace("Successfully written {} long words", bufSize / 8);
+
+ for (long i = 0; i < bufSize / 8; i++) {
+ long val = largeBuf.getLong(i * 8);
+ assertEquals(i, val);
+
+ if ((i + 1) % 10000 == 0) {
+ logger.trace("Successfully read {} long words", i + 1);
+ }
+ }
+ logger.trace("Successfully read {} long words", bufSize / 8);
+ }
+ logger.trace("Successfully released the large buffer.");
+ }
+
+ @Test
+ public void testLargeArrowBuf() {
+ run(4 * 1024 * 1024 * 1024L);
+ }
+
+ @Test
+ public void testMaxIntArrowBuf() {
+ run(Integer.MAX_VALUE);
+ }
+
+}
diff --git a/src/arrow/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/TestAllocationManagerNetty.java b/src/arrow/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/TestAllocationManagerNetty.java
new file mode 100644
index 000000000..2dbd56480
--- /dev/null
+++ b/src/arrow/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/TestAllocationManagerNetty.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+import static org.junit.Assert.assertEquals;
+
+import org.junit.Test;
+
+/**
+ * Test cases for {@link AllocationManager}.
+ */
+public class TestAllocationManagerNetty {
+
+ @Test
+ public void testAllocationManagerType() {
+ // test netty allocation manager type
+ System.setProperty(
+ DefaultAllocationManagerOption.ALLOCATION_MANAGER_TYPE_PROPERTY_NAME, "Netty");
+ DefaultAllocationManagerOption.AllocationManagerType mgrType =
+ DefaultAllocationManagerOption.getDefaultAllocationManagerType();
+
+ assertEquals(DefaultAllocationManagerOption.AllocationManagerType.Netty, mgrType);
+ }
+}
diff --git a/src/arrow/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/TestBaseAllocator.java b/src/arrow/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/TestBaseAllocator.java
new file mode 100644
index 000000000..ef49e4178
--- /dev/null
+++ b/src/arrow/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/TestBaseAllocator.java
@@ -0,0 +1,1183 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import java.lang.reflect.Field;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Iterator;
+
+import org.apache.arrow.memory.AllocationOutcomeDetails.Entry;
+import org.apache.arrow.memory.rounding.RoundingPolicy;
+import org.apache.arrow.memory.rounding.SegmentRoundingPolicy;
+import org.apache.arrow.memory.util.AssertionUtil;
+import org.junit.Ignore;
+import org.junit.Test;
+import org.junit.jupiter.api.Assertions;
+
+import sun.misc.Unsafe;
+
+public class TestBaseAllocator {
+
+ private static final int MAX_ALLOCATION = 8 * 1024;
+
+ /*
+ // ---------------------------------------- DEBUG -----------------------------------
+
+ @After
+ public void checkBuffers() {
+ final int bufferCount = UnsafeDirectLittleEndian.getBufferCount();
+ if (bufferCount != 0) {
+ UnsafeDirectLittleEndian.logBuffers(logger);
+ UnsafeDirectLittleEndian.releaseBuffers();
+ }
+
+ assertEquals(0, bufferCount);
+ }
+
+ // @AfterClass
+ // public static void dumpBuffers() {
+ // UnsafeDirectLittleEndian.logBuffers(logger);
+ // }
+
+ // ---------------------------------------- DEBUG ------------------------------------
+ */
+
+
+ @Test
+ public void test_privateMax() throws Exception {
+ try (final RootAllocator rootAllocator =
+ new RootAllocator(MAX_ALLOCATION)) {
+ final ArrowBuf arrowBuf1 = rootAllocator.buffer(MAX_ALLOCATION / 2);
+ assertNotNull("allocation failed", arrowBuf1);
+
+ try (final BufferAllocator childAllocator =
+ rootAllocator.newChildAllocator("noLimits", 0, MAX_ALLOCATION)) {
+ final ArrowBuf arrowBuf2 = childAllocator.buffer(MAX_ALLOCATION / 2);
+ assertNotNull("allocation failed", arrowBuf2);
+ arrowBuf2.getReferenceManager().release();
+ }
+
+ arrowBuf1.getReferenceManager().release();
+ }
+ }
+
+ @Test(expected = IllegalStateException.class)
+ public void testRootAllocator_closeWithOutstanding() throws Exception {
+ try {
+ try (final RootAllocator rootAllocator =
+ new RootAllocator(MAX_ALLOCATION)) {
+ final ArrowBuf arrowBuf = rootAllocator.buffer(512);
+ assertNotNull("allocation failed", arrowBuf);
+ }
+ } finally {
+ /*
+ * We expect there to be one unreleased underlying buffer because we're closing
+ * without releasing it.
+ */
+ /*
+ // ------------------------------- DEBUG ---------------------------------
+ final int bufferCount = UnsafeDirectLittleEndian.getBufferCount();
+ UnsafeDirectLittleEndian.releaseBuffers();
+ assertEquals(1, bufferCount);
+ // ------------------------------- DEBUG ---------------------------------
+ */
+ }
+ }
+
+ @Test
+ public void testRootAllocator_getEmpty() throws Exception {
+ try (final RootAllocator rootAllocator =
+ new RootAllocator(MAX_ALLOCATION)) {
+ final ArrowBuf arrowBuf = rootAllocator.buffer(0);
+ assertNotNull("allocation failed", arrowBuf);
+ assertEquals("capacity was non-zero", 0, arrowBuf.capacity());
+ assertTrue("address should be valid", arrowBuf.memoryAddress() != 0);
+ arrowBuf.getReferenceManager().release();
+ }
+ }
+
+ @Ignore // TODO(DRILL-2740)
+ @Test(expected = IllegalStateException.class)
+ public void testAllocator_unreleasedEmpty() throws Exception {
+ try (final RootAllocator rootAllocator =
+ new RootAllocator(MAX_ALLOCATION)) {
+ @SuppressWarnings("unused")
+ final ArrowBuf arrowBuf = rootAllocator.buffer(0);
+ }
+ }
+
+ @Test
+ public void testAllocator_transferOwnership() throws Exception {
+ try (final RootAllocator rootAllocator =
+ new RootAllocator(MAX_ALLOCATION)) {
+ final BufferAllocator childAllocator1 =
+ rootAllocator.newChildAllocator("changeOwnership1", 0, MAX_ALLOCATION);
+ final BufferAllocator childAllocator2 =
+ rootAllocator.newChildAllocator("changeOwnership2", 0, MAX_ALLOCATION);
+
+ final ArrowBuf arrowBuf1 = childAllocator1.buffer(MAX_ALLOCATION / 4);
+ rootAllocator.verify();
+ final ReferenceManager referenceManager = arrowBuf1.getReferenceManager();
+ OwnershipTransferResult transferOwnership = referenceManager.transferOwnership(arrowBuf1, childAllocator2);
+ assertEquiv(arrowBuf1, transferOwnership.getTransferredBuffer());
+ final boolean allocationFit = transferOwnership.getAllocationFit();
+ rootAllocator.verify();
+ assertTrue(allocationFit);
+
+ arrowBuf1.getReferenceManager().release();
+ childAllocator1.close();
+ rootAllocator.verify();
+
+ transferOwnership.getTransferredBuffer().getReferenceManager().release();
+ childAllocator2.close();
+ }
+ }
+
+ static <T> boolean equalsIgnoreOrder(Collection<T> c1, Collection<T> c2) {
+ return (c1.size() == c2.size() && c1.containsAll(c2));
+ }
+
+ @Test
+ public void testAllocator_getParentAndChild() throws Exception {
+ try (final RootAllocator rootAllocator = new RootAllocator(MAX_ALLOCATION)) {
+ assertEquals(rootAllocator.getParentAllocator(), null);
+
+ try (final BufferAllocator childAllocator1 =
+ rootAllocator.newChildAllocator("child1", 0, MAX_ALLOCATION)) {
+ assertEquals(childAllocator1.getParentAllocator(), rootAllocator);
+ assertTrue(
+ equalsIgnoreOrder(Arrays.asList(childAllocator1), rootAllocator.getChildAllocators()));
+
+ try (final BufferAllocator childAllocator2 =
+ rootAllocator.newChildAllocator("child2", 0, MAX_ALLOCATION)) {
+ assertEquals(childAllocator2.getParentAllocator(), rootAllocator);
+ assertTrue(equalsIgnoreOrder(Arrays.asList(childAllocator1, childAllocator2),
+ rootAllocator.getChildAllocators()));
+
+ try (final BufferAllocator grandChildAllocator =
+ childAllocator1.newChildAllocator("grand-child", 0, MAX_ALLOCATION)) {
+ assertEquals(grandChildAllocator.getParentAllocator(), childAllocator1);
+ assertTrue(equalsIgnoreOrder(Arrays.asList(grandChildAllocator),
+ childAllocator1.getChildAllocators()));
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testAllocator_childRemovedOnClose() throws Exception {
+ try (final RootAllocator rootAllocator = new RootAllocator(MAX_ALLOCATION)) {
+ try (final BufferAllocator childAllocator1 =
+ rootAllocator.newChildAllocator("child1", 0, MAX_ALLOCATION)) {
+ try (final BufferAllocator childAllocator2 =
+ rootAllocator.newChildAllocator("child2", 0, MAX_ALLOCATION)) {
+
+ // root has two child allocators
+ assertTrue(equalsIgnoreOrder(Arrays.asList(childAllocator1, childAllocator2),
+ rootAllocator.getChildAllocators()));
+
+ try (final BufferAllocator grandChildAllocator =
+ childAllocator1.newChildAllocator("grand-child", 0, MAX_ALLOCATION)) {
+
+ // child1 has one allocator i.e grand-child
+ assertTrue(equalsIgnoreOrder(Arrays.asList(grandChildAllocator),
+ childAllocator1.getChildAllocators()));
+ }
+
+ // grand-child closed
+ assertTrue(
+ equalsIgnoreOrder(Collections.EMPTY_SET, childAllocator1.getChildAllocators()));
+ }
+ // root has only one child left
+ assertTrue(
+ equalsIgnoreOrder(Arrays.asList(childAllocator1), rootAllocator.getChildAllocators()));
+ }
+ // all child allocators closed.
+ assertTrue(equalsIgnoreOrder(Collections.EMPTY_SET, rootAllocator.getChildAllocators()));
+ }
+ }
+
+ @Test
+ public void testAllocator_shareOwnership() throws Exception {
+ try (final RootAllocator rootAllocator = new RootAllocator(MAX_ALLOCATION)) {
+ final BufferAllocator childAllocator1 = rootAllocator.newChildAllocator("shareOwnership1", 0,
+ MAX_ALLOCATION);
+ final BufferAllocator childAllocator2 = rootAllocator.newChildAllocator("shareOwnership2", 0,
+ MAX_ALLOCATION);
+ final ArrowBuf arrowBuf1 = childAllocator1.buffer(MAX_ALLOCATION / 4);
+ rootAllocator.verify();
+
+ // share ownership of buffer.
+ final ArrowBuf arrowBuf2 = arrowBuf1.getReferenceManager().retain(arrowBuf1, childAllocator2);
+ rootAllocator.verify();
+ assertNotNull(arrowBuf2);
+ assertNotEquals(arrowBuf2, arrowBuf1);
+ assertEquiv(arrowBuf1, arrowBuf2);
+
+ // release original buffer (thus transferring ownership to allocator 2. (should leave
+ // allocator 1 in empty state)
+ arrowBuf1.getReferenceManager().release();
+ rootAllocator.verify();
+ childAllocator1.close();
+ rootAllocator.verify();
+
+ final BufferAllocator childAllocator3 = rootAllocator.newChildAllocator("shareOwnership3", 0,
+ MAX_ALLOCATION);
+ final ArrowBuf arrowBuf3 = arrowBuf1.getReferenceManager().retain(arrowBuf1, childAllocator3);
+ assertNotNull(arrowBuf3);
+ assertNotEquals(arrowBuf3, arrowBuf1);
+ assertNotEquals(arrowBuf3, arrowBuf2);
+ assertEquiv(arrowBuf1, arrowBuf3);
+ rootAllocator.verify();
+
+ arrowBuf2.getReferenceManager().release();
+ rootAllocator.verify();
+ childAllocator2.close();
+ rootAllocator.verify();
+
+ arrowBuf3.getReferenceManager().release();
+ rootAllocator.verify();
+ childAllocator3.close();
+ }
+ }
+
+ @Test
+ public void testRootAllocator_createChildAndUse() throws Exception {
+ try (final RootAllocator rootAllocator = new RootAllocator(MAX_ALLOCATION)) {
+ try (final BufferAllocator childAllocator = rootAllocator.newChildAllocator(
+ "createChildAndUse", 0, MAX_ALLOCATION)) {
+ final ArrowBuf arrowBuf = childAllocator.buffer(512);
+ assertNotNull("allocation failed", arrowBuf);
+ arrowBuf.getReferenceManager().release();
+ }
+ }
+ }
+
+ @Test(expected = IllegalStateException.class)
+ public void testRootAllocator_createChildDontClose() throws Exception {
+ try {
+ try (final RootAllocator rootAllocator = new RootAllocator(MAX_ALLOCATION)) {
+ final BufferAllocator childAllocator = rootAllocator.newChildAllocator(
+ "createChildDontClose", 0, MAX_ALLOCATION);
+ final ArrowBuf arrowBuf = childAllocator.buffer(512);
+ assertNotNull("allocation failed", arrowBuf);
+ }
+ } finally {
+ /*
+ * We expect one underlying buffer because we closed a child allocator without
+ * releasing the buffer allocated from it.
+ */
+ /*
+ // ------------------------------- DEBUG ---------------------------------
+ final int bufferCount = UnsafeDirectLittleEndian.getBufferCount();
+ UnsafeDirectLittleEndian.releaseBuffers();
+ assertEquals(1, bufferCount);
+ // ------------------------------- DEBUG ---------------------------------
+ */
+ }
+ }
+
+ @Test
+ public void testSegmentAllocator() {
+ RoundingPolicy policy = new SegmentRoundingPolicy(1024);
+ try (RootAllocator allocator = new RootAllocator(AllocationListener.NOOP, 1024 * 1024, policy)) {
+ ArrowBuf buf = allocator.buffer(798);
+ assertEquals(1024, buf.capacity());
+ buf.setInt(333, 959);
+ assertEquals(959, buf.getInt(333));
+ buf.close();
+
+ buf = allocator.buffer(1025);
+ assertEquals(2048, buf.capacity());
+ buf.setInt(193, 939);
+ assertEquals(939, buf.getInt(193));
+ buf.close();
+ }
+ }
+
+ @Test
+ public void testSegmentAllocator_childAllocator() {
+ RoundingPolicy policy = new SegmentRoundingPolicy(1024);
+ try (RootAllocator allocator = new RootAllocator(AllocationListener.NOOP, 1024 * 1024, policy);
+ BufferAllocator childAllocator = allocator.newChildAllocator("child", 0, 512 * 1024)) {
+
+ assertEquals("child", childAllocator.getName());
+
+ ArrowBuf buf = childAllocator.buffer(798);
+ assertEquals(1024, buf.capacity());
+ buf.setInt(333, 959);
+ assertEquals(959, buf.getInt(333));
+ buf.close();
+
+ buf = childAllocator.buffer(1025);
+ assertEquals(2048, buf.capacity());
+ buf.setInt(193, 939);
+ assertEquals(939, buf.getInt(193));
+ buf.close();
+ }
+ }
+
+ @Test
+ public void testSegmentAllocator_smallSegment() {
+ IllegalArgumentException e = Assertions.assertThrows(
+ IllegalArgumentException.class,
+ () -> new SegmentRoundingPolicy(128)
+ );
+ assertEquals("The segment size cannot be smaller than 1024", e.getMessage());
+ }
+
+ @Test
+ public void testSegmentAllocator_segmentSizeNotPowerOf2() {
+ IllegalArgumentException e = Assertions.assertThrows(
+ IllegalArgumentException.class,
+ () -> new SegmentRoundingPolicy(4097)
+ );
+ assertEquals("The segment size must be a power of 2", e.getMessage());
+ }
+
+ @Test
+ public void testCustomizedAllocationManager() {
+ try (BaseAllocator allocator = createAllocatorWithCustomizedAllocationManager()) {
+ final ArrowBuf arrowBuf1 = allocator.buffer(MAX_ALLOCATION);
+ assertNotNull("allocation failed", arrowBuf1);
+
+ arrowBuf1.setInt(0, 1);
+ assertEquals(1, arrowBuf1.getInt(0));
+
+ try {
+ final ArrowBuf arrowBuf2 = allocator.buffer(1);
+ fail("allocated memory beyond max allowed");
+ } catch (OutOfMemoryException e) {
+ // expected
+ }
+ arrowBuf1.getReferenceManager().release();
+
+ try {
+ arrowBuf1.getInt(0);
+ fail("data read from released buffer");
+ } catch (RuntimeException e) {
+ // expected
+ }
+ }
+ }
+
+ private BaseAllocator createAllocatorWithCustomizedAllocationManager() {
+ return new RootAllocator(BaseAllocator.configBuilder()
+ .maxAllocation(MAX_ALLOCATION)
+ .allocationManagerFactory(new AllocationManager.Factory() {
+ @Override
+ public AllocationManager create(BufferAllocator accountingAllocator, long requestedSize) {
+ return new AllocationManager(accountingAllocator) {
+ private final Unsafe unsafe = getUnsafe();
+ private final long address = unsafe.allocateMemory(requestedSize);
+
+ @Override
+ protected long memoryAddress() {
+ return address;
+ }
+
+ @Override
+ protected void release0() {
+ unsafe.setMemory(address, requestedSize, (byte) 0);
+ unsafe.freeMemory(address);
+ }
+
+ @Override
+ public long getSize() {
+ return requestedSize;
+ }
+
+ private Unsafe getUnsafe() {
+ Field f = null;
+ try {
+ f = Unsafe.class.getDeclaredField("theUnsafe");
+ f.setAccessible(true);
+ return (Unsafe) f.get(null);
+ } catch (NoSuchFieldException | IllegalAccessException e) {
+ throw new RuntimeException(e);
+ } finally {
+ if (f != null) {
+ f.setAccessible(false);
+ }
+ }
+ }
+ };
+ }
+
+ @Override
+ public ArrowBuf empty() {
+ return null;
+ }
+ }).build());
+ }
+
+ // Allocation listener
+ // It counts the number of times it has been invoked, and how much memory allocation it has seen
+ // When set to 'expand on fail', it attempts to expand the associated allocator's limit
+ private static final class TestAllocationListener implements AllocationListener {
+ private int numPreCalls;
+ private int numCalls;
+ private int numReleaseCalls;
+ private int numChildren;
+ private long totalMem;
+ private boolean expandOnFail;
+ BufferAllocator expandAlloc;
+ long expandLimit;
+
+ TestAllocationListener() {
+ this.numCalls = 0;
+ this.numChildren = 0;
+ this.totalMem = 0;
+ this.expandOnFail = false;
+ this.expandAlloc = null;
+ this.expandLimit = 0;
+ }
+
+ @Override
+ public void onPreAllocation(long size) {
+ numPreCalls++;
+ }
+
+ @Override
+ public void onAllocation(long size) {
+ numCalls++;
+ totalMem += size;
+ }
+
+ @Override
+ public boolean onFailedAllocation(long size, AllocationOutcome outcome) {
+ if (expandOnFail) {
+ expandAlloc.setLimit(expandLimit);
+ return true;
+ }
+ return false;
+ }
+
+
+ @Override
+ public void onRelease(long size) {
+ numReleaseCalls++;
+ }
+
+ @Override
+ public void onChildAdded(BufferAllocator parentAllocator, BufferAllocator childAllocator) {
+ ++numChildren;
+ }
+
+ @Override
+ public void onChildRemoved(BufferAllocator parentAllocator, BufferAllocator childAllocator) {
+ --numChildren;
+ }
+
+ void setExpandOnFail(BufferAllocator expandAlloc, long expandLimit) {
+ this.expandOnFail = true;
+ this.expandAlloc = expandAlloc;
+ this.expandLimit = expandLimit;
+ }
+
+ int getNumPreCalls() {
+ return numPreCalls;
+ }
+
+ int getNumReleaseCalls() {
+ return numReleaseCalls;
+ }
+
+ int getNumCalls() {
+ return numCalls;
+ }
+
+ int getNumChildren() {
+ return numChildren;
+ }
+
+ long getTotalMem() {
+ return totalMem;
+ }
+ }
+
+ @Test
+ public void testRootAllocator_listeners() throws Exception {
+ TestAllocationListener l1 = new TestAllocationListener();
+ assertEquals(0, l1.getNumPreCalls());
+ assertEquals(0, l1.getNumCalls());
+ assertEquals(0, l1.getNumReleaseCalls());
+ assertEquals(0, l1.getNumChildren());
+ assertEquals(0, l1.getTotalMem());
+ TestAllocationListener l2 = new TestAllocationListener();
+ assertEquals(0, l2.getNumPreCalls());
+ assertEquals(0, l2.getNumCalls());
+ assertEquals(0, l2.getNumReleaseCalls());
+ assertEquals(0, l2.getNumChildren());
+ assertEquals(0, l2.getTotalMem());
+ // root and first-level child share the first listener
+ // second-level and third-level child share the second listener
+ try (final RootAllocator rootAllocator = new RootAllocator(l1, MAX_ALLOCATION)) {
+ try (final BufferAllocator c1 = rootAllocator.newChildAllocator("c1", 0, MAX_ALLOCATION)) {
+ assertEquals(1, l1.getNumChildren());
+ final ArrowBuf buf1 = c1.buffer(16);
+ assertNotNull("allocation failed", buf1);
+ assertEquals(1, l1.getNumPreCalls());
+ assertEquals(1, l1.getNumCalls());
+ assertEquals(0, l1.getNumReleaseCalls());
+ assertEquals(16, l1.getTotalMem());
+ buf1.getReferenceManager().release();
+ try (final BufferAllocator c2 = c1.newChildAllocator("c2", l2, 0, MAX_ALLOCATION)) {
+ assertEquals(2, l1.getNumChildren()); // c1 got a new child, so c1's listener (l1) is notified
+ assertEquals(0, l2.getNumChildren());
+ final ArrowBuf buf2 = c2.buffer(32);
+ assertNotNull("allocation failed", buf2);
+ assertEquals(1, l1.getNumCalls());
+ assertEquals(16, l1.getTotalMem());
+ assertEquals(1, l2.getNumPreCalls());
+ assertEquals(1, l2.getNumCalls());
+ assertEquals(0, l2.getNumReleaseCalls());
+ assertEquals(32, l2.getTotalMem());
+ buf2.getReferenceManager().release();
+ try (final BufferAllocator c3 = c2.newChildAllocator("c3", 0, MAX_ALLOCATION)) {
+ assertEquals(2, l1.getNumChildren());
+ assertEquals(1, l2.getNumChildren());
+ final ArrowBuf buf3 = c3.buffer(64);
+ assertNotNull("allocation failed", buf3);
+ assertEquals(1, l1.getNumPreCalls());
+ assertEquals(1, l1.getNumCalls());
+ assertEquals(1, l1.getNumReleaseCalls());
+ assertEquals(16, l1.getTotalMem());
+ assertEquals(2, l2.getNumPreCalls());
+ assertEquals(2, l2.getNumCalls());
+ assertEquals(1, l2.getNumReleaseCalls());
+ assertEquals(32 + 64, l2.getTotalMem());
+ buf3.getReferenceManager().release();
+ }
+ assertEquals(2, l1.getNumChildren());
+ assertEquals(0, l2.getNumChildren()); // third-level child removed
+ }
+ assertEquals(1, l1.getNumChildren()); // second-level child removed
+ assertEquals(0, l2.getNumChildren());
+ }
+ assertEquals(0, l1.getNumChildren()); // first-level child removed
+
+ assertEquals(2, l2.getNumReleaseCalls());
+ }
+ }
+
+ @Test
+ public void testRootAllocator_listenerAllocationFail() throws Exception {
+ TestAllocationListener l1 = new TestAllocationListener();
+ assertEquals(0, l1.getNumCalls());
+ assertEquals(0, l1.getTotalMem());
+ // Test attempts to allocate too much from a child whose limit is set to half of the max
+ // allocation. The listener's callback triggers, expanding the child allocator's limit, so then
+ // the allocation succeeds.
+ try (final RootAllocator rootAllocator = new RootAllocator(MAX_ALLOCATION)) {
+ try (final BufferAllocator c1 = rootAllocator.newChildAllocator("c1", l1, 0,
+ MAX_ALLOCATION / 2)) {
+ try {
+ c1.buffer(MAX_ALLOCATION);
+ fail("allocated memory beyond max allowed");
+ } catch (OutOfMemoryException e) {
+ // expected
+ }
+ assertEquals(0, l1.getNumCalls());
+ assertEquals(0, l1.getTotalMem());
+
+ l1.setExpandOnFail(c1, MAX_ALLOCATION);
+ ArrowBuf arrowBuf = c1.buffer(MAX_ALLOCATION);
+ assertNotNull("allocation failed", arrowBuf);
+ assertEquals(1, l1.getNumCalls());
+ assertEquals(MAX_ALLOCATION, l1.getTotalMem());
+ arrowBuf.getReferenceManager().release();
+ }
+ }
+ }
+
+ private static void allocateAndFree(final BufferAllocator allocator) {
+ final ArrowBuf arrowBuf = allocator.buffer(512);
+ assertNotNull("allocation failed", arrowBuf);
+ arrowBuf.getReferenceManager().release();
+
+ final ArrowBuf arrowBuf2 = allocator.buffer(MAX_ALLOCATION);
+ assertNotNull("allocation failed", arrowBuf2);
+ arrowBuf2.getReferenceManager().release();
+
+ final int nBufs = 8;
+ final ArrowBuf[] arrowBufs = new ArrowBuf[nBufs];
+ for (int i = 0; i < arrowBufs.length; ++i) {
+ ArrowBuf arrowBufi = allocator.buffer(MAX_ALLOCATION / nBufs);
+ assertNotNull("allocation failed", arrowBufi);
+ arrowBufs[i] = arrowBufi;
+ }
+ for (ArrowBuf arrowBufi : arrowBufs) {
+ arrowBufi.getReferenceManager().release();
+ }
+ }
+
+ @Test
+ public void testAllocator_manyAllocations() throws Exception {
+ try (final RootAllocator rootAllocator =
+ new RootAllocator(MAX_ALLOCATION)) {
+ try (final BufferAllocator childAllocator =
+ rootAllocator.newChildAllocator("manyAllocations", 0, MAX_ALLOCATION)) {
+ allocateAndFree(childAllocator);
+ }
+ }
+ }
+
+ @Test
+ public void testAllocator_overAllocate() throws Exception {
+ try (final RootAllocator rootAllocator =
+ new RootAllocator(MAX_ALLOCATION)) {
+ try (final BufferAllocator childAllocator =
+ rootAllocator.newChildAllocator("overAllocate", 0, MAX_ALLOCATION)) {
+ allocateAndFree(childAllocator);
+
+ try {
+ childAllocator.buffer(MAX_ALLOCATION + 1);
+ fail("allocated memory beyond max allowed");
+ } catch (OutOfMemoryException e) {
+ // expected
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testAllocator_overAllocateParent() throws Exception {
+ try (final RootAllocator rootAllocator =
+ new RootAllocator(MAX_ALLOCATION)) {
+ try (final BufferAllocator childAllocator =
+ rootAllocator.newChildAllocator("overAllocateParent", 0, MAX_ALLOCATION)) {
+ final ArrowBuf arrowBuf1 = rootAllocator.buffer(MAX_ALLOCATION / 2);
+ assertNotNull("allocation failed", arrowBuf1);
+ final ArrowBuf arrowBuf2 = childAllocator.buffer(MAX_ALLOCATION / 2);
+ assertNotNull("allocation failed", arrowBuf2);
+
+ try {
+ childAllocator.buffer(MAX_ALLOCATION / 4);
+ fail("allocated memory beyond max allowed");
+ } catch (OutOfMemoryException e) {
+ // expected
+ }
+
+ arrowBuf1.getReferenceManager().release();
+ arrowBuf2.getReferenceManager().release();
+ }
+ }
+ }
+
+ @Test
+ public void testAllocator_failureAtParentLimitOutcomeDetails() throws Exception {
+ try (final RootAllocator rootAllocator =
+ new RootAllocator(MAX_ALLOCATION)) {
+ try (final BufferAllocator childAllocator =
+ rootAllocator.newChildAllocator("child", 0, MAX_ALLOCATION / 2)) {
+ try (final BufferAllocator grandChildAllocator =
+ childAllocator.newChildAllocator("grandchild", MAX_ALLOCATION / 4, MAX_ALLOCATION)) {
+ OutOfMemoryException e = assertThrows(OutOfMemoryException.class,
+ () -> grandChildAllocator.buffer(MAX_ALLOCATION));
+ // expected
+ assertTrue(e.getMessage().contains("Unable to allocate buffer"));
+
+ assertTrue("missing outcome details", e.getOutcomeDetails().isPresent());
+ AllocationOutcomeDetails outcomeDetails = e.getOutcomeDetails().get();
+
+ assertEquals(outcomeDetails.getFailedAllocator(), childAllocator);
+
+ // The order of allocators should be child to root (request propagates to parent if
+ // child cannot satisfy the request).
+ Iterator<Entry> iterator = outcomeDetails.allocEntries.iterator();
+ AllocationOutcomeDetails.Entry first = iterator.next();
+ assertEquals(MAX_ALLOCATION / 4, first.getAllocatedSize());
+ assertEquals(MAX_ALLOCATION, first.getRequestedSize());
+ assertEquals(false, first.isAllocationFailed());
+
+ AllocationOutcomeDetails.Entry second = iterator.next();
+ assertEquals(MAX_ALLOCATION - MAX_ALLOCATION / 4, second.getRequestedSize());
+ assertEquals(0, second.getAllocatedSize());
+ assertEquals(true, second.isAllocationFailed());
+
+ assertFalse(iterator.hasNext());
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testAllocator_failureAtRootLimitOutcomeDetails() throws Exception {
+ try (final RootAllocator rootAllocator =
+ new RootAllocator(MAX_ALLOCATION)) {
+ try (final BufferAllocator childAllocator =
+ rootAllocator.newChildAllocator("child", MAX_ALLOCATION / 2, Long.MAX_VALUE)) {
+ try (final BufferAllocator grandChildAllocator =
+ childAllocator.newChildAllocator("grandchild", MAX_ALLOCATION / 4, Long.MAX_VALUE)) {
+ OutOfMemoryException e = assertThrows(OutOfMemoryException.class,
+ () -> grandChildAllocator.buffer(MAX_ALLOCATION * 2));
+
+ assertTrue(e.getMessage().contains("Unable to allocate buffer"));
+ assertTrue("missing outcome details", e.getOutcomeDetails().isPresent());
+ AllocationOutcomeDetails outcomeDetails = e.getOutcomeDetails().get();
+
+ assertEquals(outcomeDetails.getFailedAllocator(), rootAllocator);
+
+ // The order of allocators should be child to root (request propagates to parent if
+ // child cannot satisfy the request).
+ Iterator<Entry> iterator = outcomeDetails.allocEntries.iterator();
+ AllocationOutcomeDetails.Entry first = iterator.next();
+ assertEquals(MAX_ALLOCATION / 4, first.getAllocatedSize());
+ assertEquals(2 * MAX_ALLOCATION, first.getRequestedSize());
+ assertEquals(false, first.isAllocationFailed());
+
+ AllocationOutcomeDetails.Entry second = iterator.next();
+ assertEquals(MAX_ALLOCATION / 4, second.getAllocatedSize());
+ assertEquals(2 * MAX_ALLOCATION - MAX_ALLOCATION / 4, second.getRequestedSize());
+ assertEquals(false, second.isAllocationFailed());
+
+ AllocationOutcomeDetails.Entry third = iterator.next();
+ assertEquals(0, third.getAllocatedSize());
+ assertEquals(true, third.isAllocationFailed());
+
+ assertFalse(iterator.hasNext());
+ }
+ }
+ }
+ }
+
+ private static void testAllocator_sliceUpBufferAndRelease(
+ final RootAllocator rootAllocator, final BufferAllocator bufferAllocator) {
+ final ArrowBuf arrowBuf1 = bufferAllocator.buffer(MAX_ALLOCATION / 2);
+ rootAllocator.verify();
+
+ final ArrowBuf arrowBuf2 = arrowBuf1.slice(16, arrowBuf1.capacity() - 32);
+ rootAllocator.verify();
+ final ArrowBuf arrowBuf3 = arrowBuf2.slice(16, arrowBuf2.capacity() - 32);
+ rootAllocator.verify();
+ @SuppressWarnings("unused")
+ final ArrowBuf arrowBuf4 = arrowBuf3.slice(16, arrowBuf3.capacity() - 32);
+ rootAllocator.verify();
+
+ arrowBuf3.getReferenceManager().release(); // since they share refcounts, one is enough to release them all
+ rootAllocator.verify();
+ }
+
+ @Test
+ public void testAllocator_createSlices() throws Exception {
+ try (final RootAllocator rootAllocator = new RootAllocator(MAX_ALLOCATION)) {
+ testAllocator_sliceUpBufferAndRelease(rootAllocator, rootAllocator);
+
+ try (final BufferAllocator childAllocator = rootAllocator.newChildAllocator("createSlices", 0,
+ MAX_ALLOCATION)) {
+ testAllocator_sliceUpBufferAndRelease(rootAllocator, childAllocator);
+ }
+ rootAllocator.verify();
+
+ testAllocator_sliceUpBufferAndRelease(rootAllocator, rootAllocator);
+
+ try (final BufferAllocator childAllocator = rootAllocator.newChildAllocator("createSlices", 0,
+ MAX_ALLOCATION)) {
+ try (final BufferAllocator childAllocator2 =
+ childAllocator.newChildAllocator("createSlices", 0, MAX_ALLOCATION)) {
+ final ArrowBuf arrowBuf1 = childAllocator2.buffer(MAX_ALLOCATION / 8);
+ @SuppressWarnings("unused")
+ final ArrowBuf arrowBuf2 = arrowBuf1.slice(MAX_ALLOCATION / 16, MAX_ALLOCATION / 16);
+ testAllocator_sliceUpBufferAndRelease(rootAllocator, childAllocator);
+ arrowBuf1.getReferenceManager().release();
+ rootAllocator.verify();
+ }
+ rootAllocator.verify();
+
+ testAllocator_sliceUpBufferAndRelease(rootAllocator, childAllocator);
+ }
+ rootAllocator.verify();
+ }
+ }
+
+ @Test
+ public void testAllocator_sliceRanges() throws Exception {
+ // final AllocatorOwner allocatorOwner = new NamedOwner("sliceRanges");
+ try (final RootAllocator rootAllocator =
+ new RootAllocator(MAX_ALLOCATION)) {
+ // Populate a buffer with byte values corresponding to their indices.
+ final ArrowBuf arrowBuf = rootAllocator.buffer(256);
+ assertEquals(256, arrowBuf.capacity());
+ assertEquals(0, arrowBuf.readerIndex());
+ assertEquals(0, arrowBuf.readableBytes());
+ assertEquals(0, arrowBuf.writerIndex());
+ assertEquals(256, arrowBuf.writableBytes());
+
+ final ArrowBuf slice3 = arrowBuf.slice();
+ assertEquals(0, slice3.readerIndex());
+ assertEquals(0, slice3.readableBytes());
+ assertEquals(0, slice3.writerIndex());
+ // assertEquals(256, slice3.capacity());
+ // assertEquals(256, slice3.writableBytes());
+
+ for (int i = 0; i < 256; ++i) {
+ arrowBuf.writeByte(i);
+ }
+ assertEquals(0, arrowBuf.readerIndex());
+ assertEquals(256, arrowBuf.readableBytes());
+ assertEquals(256, arrowBuf.writerIndex());
+ assertEquals(0, arrowBuf.writableBytes());
+
+ final ArrowBuf slice1 = arrowBuf.slice();
+ assertEquals(0, slice1.readerIndex());
+ assertEquals(256, slice1.readableBytes());
+ for (int i = 0; i < 10; ++i) {
+ assertEquals(i, slice1.readByte());
+ }
+ assertEquals(256 - 10, slice1.readableBytes());
+ for (int i = 0; i < 256; ++i) {
+ assertEquals((byte) i, slice1.getByte(i));
+ }
+
+ final ArrowBuf slice2 = arrowBuf.slice(25, 25);
+ assertEquals(0, slice2.readerIndex());
+ assertEquals(25, slice2.readableBytes());
+ for (int i = 25; i < 50; ++i) {
+ assertEquals(i, slice2.readByte());
+ }
+
+ /*
+ for(int i = 256; i > 0; --i) {
+ slice3.writeByte(i - 1);
+ }
+ for(int i = 0; i < 256; ++i) {
+ assertEquals(255 - i, slice1.getByte(i));
+ }
+ */
+
+ arrowBuf.getReferenceManager().release(); // all the derived buffers share this fate
+ }
+ }
+
+ @Test
+ public void testAllocator_slicesOfSlices() throws Exception {
+ // final AllocatorOwner allocatorOwner = new NamedOwner("slicesOfSlices");
+ try (final RootAllocator rootAllocator =
+ new RootAllocator(MAX_ALLOCATION)) {
+ // Populate a buffer with byte values corresponding to their indices.
+ final ArrowBuf arrowBuf = rootAllocator.buffer(256);
+ for (int i = 0; i < 256; ++i) {
+ arrowBuf.writeByte(i);
+ }
+
+ // Slice it up.
+ final ArrowBuf slice0 = arrowBuf.slice(0, arrowBuf.capacity());
+ for (int i = 0; i < 256; ++i) {
+ assertEquals((byte) i, arrowBuf.getByte(i));
+ }
+
+ final ArrowBuf slice10 = slice0.slice(10, arrowBuf.capacity() - 10);
+ for (int i = 10; i < 256; ++i) {
+ assertEquals((byte) i, slice10.getByte(i - 10));
+ }
+
+ final ArrowBuf slice20 = slice10.slice(10, arrowBuf.capacity() - 20);
+ for (int i = 20; i < 256; ++i) {
+ assertEquals((byte) i, slice20.getByte(i - 20));
+ }
+
+ final ArrowBuf slice30 = slice20.slice(10, arrowBuf.capacity() - 30);
+ for (int i = 30; i < 256; ++i) {
+ assertEquals((byte) i, slice30.getByte(i - 30));
+ }
+
+ arrowBuf.getReferenceManager().release();
+ }
+ }
+
+ @Test
+ public void testAllocator_transferSliced() throws Exception {
+ try (final RootAllocator rootAllocator = new RootAllocator(MAX_ALLOCATION)) {
+ final BufferAllocator childAllocator1 = rootAllocator.newChildAllocator("transferSliced1", 0, MAX_ALLOCATION);
+ final BufferAllocator childAllocator2 = rootAllocator.newChildAllocator("transferSliced2", 0, MAX_ALLOCATION);
+
+ final ArrowBuf arrowBuf1 = childAllocator1.buffer(MAX_ALLOCATION / 8);
+ final ArrowBuf arrowBuf2 = childAllocator2.buffer(MAX_ALLOCATION / 8);
+
+ final ArrowBuf arrowBuf1s = arrowBuf1.slice(0, arrowBuf1.capacity() / 2);
+ final ArrowBuf arrowBuf2s = arrowBuf2.slice(0, arrowBuf2.capacity() / 2);
+
+ rootAllocator.verify();
+
+ OwnershipTransferResult result1 = arrowBuf2s.getReferenceManager().transferOwnership(arrowBuf2s, childAllocator1);
+ assertEquiv(arrowBuf2s, result1.getTransferredBuffer());
+ rootAllocator.verify();
+ OwnershipTransferResult result2 = arrowBuf1s.getReferenceManager().transferOwnership(arrowBuf1s, childAllocator2);
+ assertEquiv(arrowBuf1s, result2.getTransferredBuffer());
+ rootAllocator.verify();
+
+ result1.getTransferredBuffer().getReferenceManager().release();
+ result2.getTransferredBuffer().getReferenceManager().release();
+
+ arrowBuf1s.getReferenceManager().release(); // releases arrowBuf1
+ arrowBuf2s.getReferenceManager().release(); // releases arrowBuf2
+
+ childAllocator1.close();
+ childAllocator2.close();
+ }
+ }
+
+ @Test
+ public void testAllocator_shareSliced() throws Exception {
+ try (final RootAllocator rootAllocator = new RootAllocator(MAX_ALLOCATION)) {
+ final BufferAllocator childAllocator1 = rootAllocator.newChildAllocator("transferSliced", 0, MAX_ALLOCATION);
+ final BufferAllocator childAllocator2 = rootAllocator.newChildAllocator("transferSliced", 0, MAX_ALLOCATION);
+
+ final ArrowBuf arrowBuf1 = childAllocator1.buffer(MAX_ALLOCATION / 8);
+ final ArrowBuf arrowBuf2 = childAllocator2.buffer(MAX_ALLOCATION / 8);
+
+ final ArrowBuf arrowBuf1s = arrowBuf1.slice(0, arrowBuf1.capacity() / 2);
+ final ArrowBuf arrowBuf2s = arrowBuf2.slice(0, arrowBuf2.capacity() / 2);
+
+ rootAllocator.verify();
+
+ final ArrowBuf arrowBuf2s1 = arrowBuf2s.getReferenceManager().retain(arrowBuf2s, childAllocator1);
+ assertEquiv(arrowBuf2s, arrowBuf2s1);
+ final ArrowBuf arrowBuf1s2 = arrowBuf1s.getReferenceManager().retain(arrowBuf1s, childAllocator2);
+ assertEquiv(arrowBuf1s, arrowBuf1s2);
+ rootAllocator.verify();
+
+ arrowBuf1s.getReferenceManager().release(); // releases arrowBuf1
+ arrowBuf2s.getReferenceManager().release(); // releases arrowBuf2
+ rootAllocator.verify();
+
+ arrowBuf2s1.getReferenceManager().release(); // releases the shared arrowBuf2 slice
+ arrowBuf1s2.getReferenceManager().release(); // releases the shared arrowBuf1 slice
+
+ childAllocator1.close();
+ childAllocator2.close();
+ }
+ }
+
+ @Test
+ public void testAllocator_transferShared() throws Exception {
+ try (final RootAllocator rootAllocator = new RootAllocator(MAX_ALLOCATION)) {
+ final BufferAllocator childAllocator1 = rootAllocator.newChildAllocator("transferShared1", 0, MAX_ALLOCATION);
+ final BufferAllocator childAllocator2 = rootAllocator.newChildAllocator("transferShared2", 0, MAX_ALLOCATION);
+ final BufferAllocator childAllocator3 = rootAllocator.newChildAllocator("transferShared3", 0, MAX_ALLOCATION);
+
+ final ArrowBuf arrowBuf1 = childAllocator1.buffer(MAX_ALLOCATION / 8);
+
+ boolean allocationFit;
+
+ ArrowBuf arrowBuf2 = arrowBuf1.getReferenceManager().retain(arrowBuf1, childAllocator2);
+ rootAllocator.verify();
+ assertNotNull(arrowBuf2);
+ assertNotEquals(arrowBuf2, arrowBuf1);
+ assertEquiv(arrowBuf1, arrowBuf2);
+
+ final ReferenceManager refManager1 = arrowBuf1.getReferenceManager();
+ final OwnershipTransferResult result1 = refManager1.transferOwnership(arrowBuf1, childAllocator3);
+ allocationFit = result1.getAllocationFit();
+ final ArrowBuf arrowBuf3 = result1.getTransferredBuffer();
+ assertTrue(allocationFit);
+ assertEquiv(arrowBuf1, arrowBuf3);
+ rootAllocator.verify();
+
+ // Since childAllocator3 now has childAllocator1's buffer, 1, can close
+ arrowBuf1.getReferenceManager().release();
+ childAllocator1.close();
+ rootAllocator.verify();
+
+ arrowBuf2.getReferenceManager().release();
+ childAllocator2.close();
+ rootAllocator.verify();
+
+ final BufferAllocator childAllocator4 = rootAllocator.newChildAllocator("transferShared4", 0, MAX_ALLOCATION);
+ final ReferenceManager refManager3 = arrowBuf3.getReferenceManager();
+ final OwnershipTransferResult result3 = refManager3.transferOwnership(arrowBuf3, childAllocator4);
+ allocationFit = result3.getAllocationFit();
+ final ArrowBuf arrowBuf4 = result3.getTransferredBuffer();
+ assertTrue(allocationFit);
+ assertEquiv(arrowBuf3, arrowBuf4);
+ rootAllocator.verify();
+
+ arrowBuf3.getReferenceManager().release();
+ childAllocator3.close();
+ rootAllocator.verify();
+
+ arrowBuf4.getReferenceManager().release();
+ childAllocator4.close();
+ rootAllocator.verify();
+ }
+ }
+
+ @Test
+ public void testAllocator_unclaimedReservation() throws Exception {
+ try (final RootAllocator rootAllocator = new RootAllocator(MAX_ALLOCATION)) {
+ try (final BufferAllocator childAllocator1 =
+ rootAllocator.newChildAllocator("unclaimedReservation", 0, MAX_ALLOCATION)) {
+ try (final AllocationReservation reservation = childAllocator1.newReservation()) {
+ assertTrue(reservation.add(64));
+ }
+ rootAllocator.verify();
+ }
+ }
+ }
+
+ @Test
+ public void testAllocator_claimedReservation() throws Exception {
+ try (final RootAllocator rootAllocator = new RootAllocator(MAX_ALLOCATION)) {
+
+ try (final BufferAllocator childAllocator1 = rootAllocator.newChildAllocator(
+ "claimedReservation", 0, MAX_ALLOCATION)) {
+
+ try (final AllocationReservation reservation = childAllocator1.newReservation()) {
+ assertTrue(reservation.add(32));
+ assertTrue(reservation.add(32));
+
+ final ArrowBuf arrowBuf = reservation.allocateBuffer();
+ assertEquals(64, arrowBuf.capacity());
+ rootAllocator.verify();
+
+ arrowBuf.getReferenceManager().release();
+ rootAllocator.verify();
+ }
+ rootAllocator.verify();
+ }
+ }
+ }
+
+ @Test
+ public void testInitReservationAndLimit() throws Exception {
+ try (final RootAllocator rootAllocator = new RootAllocator(MAX_ALLOCATION)) {
+ try (final BufferAllocator childAllocator = rootAllocator.newChildAllocator(
+ "child", 2048, 4096)) {
+ assertEquals(2048, childAllocator.getInitReservation());
+ assertEquals(4096, childAllocator.getLimit());
+ }
+ }
+ }
+
+ @Test
+ public void multiple() throws Exception {
+ final String owner = "test";
+ try (RootAllocator allocator = new RootAllocator(Long.MAX_VALUE)) {
+
+ final int op = 100000;
+
+ BufferAllocator frag1 = allocator.newChildAllocator(owner, 1500000, Long.MAX_VALUE);
+ BufferAllocator frag2 = allocator.newChildAllocator(owner, 500000, Long.MAX_VALUE);
+
+ allocator.verify();
+
+ BufferAllocator allocator11 = frag1.newChildAllocator(owner, op, Long.MAX_VALUE);
+ ArrowBuf b11 = allocator11.buffer(1000000);
+
+ allocator.verify();
+
+ BufferAllocator allocator12 = frag1.newChildAllocator(owner, op, Long.MAX_VALUE);
+ ArrowBuf b12 = allocator12.buffer(500000);
+
+ allocator.verify();
+
+ BufferAllocator allocator21 = frag1.newChildAllocator(owner, op, Long.MAX_VALUE);
+
+ allocator.verify();
+
+ BufferAllocator allocator22 = frag2.newChildAllocator(owner, op, Long.MAX_VALUE);
+ ArrowBuf b22 = allocator22.buffer(2000000);
+
+ allocator.verify();
+
+ BufferAllocator frag3 = allocator.newChildAllocator(owner, 1000000, Long.MAX_VALUE);
+
+ allocator.verify();
+
+ BufferAllocator allocator31 = frag3.newChildAllocator(owner, op, Long.MAX_VALUE);
+ ArrowBuf b31a = allocator31.buffer(200000);
+
+ allocator.verify();
+
+ // Previously running operator completes
+ b22.getReferenceManager().release();
+
+ allocator.verify();
+
+ allocator22.close();
+
+ b31a.getReferenceManager().release();
+ allocator31.close();
+
+ b12.getReferenceManager().release();
+ allocator12.close();
+
+ allocator21.close();
+
+ b11.getReferenceManager().release();
+ allocator11.close();
+
+ frag1.close();
+ frag2.close();
+ frag3.close();
+
+ }
+ }
+
+ // This test needs to run in non-debug mode. So disabling the assertion status through class loader for this.
+ // The test passes if run individually with -Dtest=TestBaseAllocator#testMemoryLeakWithReservation
+ // but fails generally since the assertion status cannot be changed once the class is initialized.
+ // So setting the test to @ignore
+ @Test(expected = IllegalStateException.class)
+ @Ignore
+ public void testMemoryLeakWithReservation() throws Exception {
+ // disabling assertion status
+ AssertionUtil.class.getClassLoader().setClassAssertionStatus(AssertionUtil.class.getName(), false);
+ try (RootAllocator rootAllocator = new RootAllocator(MAX_ALLOCATION)) {
+ ChildAllocator childAllocator1 = (ChildAllocator) rootAllocator.newChildAllocator(
+ "child1", 1024, MAX_ALLOCATION);
+ rootAllocator.verify();
+
+ ChildAllocator childAllocator2 = (ChildAllocator) childAllocator1.newChildAllocator(
+ "child2", 1024, MAX_ALLOCATION);
+ rootAllocator.verify();
+
+ ArrowBuf buff = childAllocator2.buffer(256);
+
+ Exception exception = assertThrows(IllegalStateException.class, () -> {
+ childAllocator2.close();
+ });
+ String exMessage = exception.getMessage();
+ assertTrue(exMessage.contains("Memory leaked: (256)"));
+
+ exception = assertThrows(IllegalStateException.class, () -> {
+ childAllocator1.close();
+ });
+ exMessage = exception.getMessage();
+ assertTrue(exMessage.contains("Memory leaked: (256)"));
+ }
+ }
+
+ public void assertEquiv(ArrowBuf origBuf, ArrowBuf newBuf) {
+ assertEquals(origBuf.readerIndex(), newBuf.readerIndex());
+ assertEquals(origBuf.writerIndex(), newBuf.writerIndex());
+ }
+}
diff --git a/src/arrow/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/TestEmptyArrowBuf.java b/src/arrow/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/TestEmptyArrowBuf.java
new file mode 100644
index 000000000..3fd7ce74a
--- /dev/null
+++ b/src/arrow/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/TestEmptyArrowBuf.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+import static org.junit.Assert.assertEquals;
+
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import io.netty.buffer.PooledByteBufAllocatorL;
+
+public class TestEmptyArrowBuf {
+
+ private static final int MAX_ALLOCATION = 8 * 1024;
+ private static RootAllocator allocator;
+
+ @BeforeClass
+ public static void beforeClass() {
+ allocator = new RootAllocator(MAX_ALLOCATION);
+ }
+
+ /** Ensure the allocator is closed. */
+ @AfterClass
+ public static void afterClass() {
+ if (allocator != null) {
+ allocator.close();
+ }
+ }
+
+ @Test
+ public void testZeroBuf() {
+ // Exercise the historical log inside the empty ArrowBuf. This is initialized statically, and there is a circular
+ // dependency between ArrowBuf and BaseAllocator, so if the initialization happens in the wrong order, the
+ // historical log will be null even though BaseAllocator.DEBUG is true.
+ allocator.getEmpty().print(new StringBuilder(), 0, BaseAllocator.Verbosity.LOG_WITH_STACKTRACE);
+ }
+
+ @Test
+ public void testEmptyArrowBuf() {
+ ArrowBuf buf = new ArrowBuf(ReferenceManager.NO_OP, null,
+ 1024, new PooledByteBufAllocatorL().empty.memoryAddress());
+
+ buf.getReferenceManager().retain();
+ buf.getReferenceManager().retain(8);
+ assertEquals(1024, buf.capacity());
+ assertEquals(1, buf.getReferenceManager().getRefCount());
+ assertEquals(0, buf.getActualMemoryConsumed());
+
+ for (int i = 0; i < 10; i++) {
+ buf.setByte(i, i);
+ }
+ assertEquals(0, buf.getActualMemoryConsumed());
+ assertEquals(0, buf.getReferenceManager().getSize());
+ assertEquals(0, buf.getReferenceManager().getAccountedSize());
+ assertEquals(false, buf.getReferenceManager().release());
+ assertEquals(false, buf.getReferenceManager().release(2));
+ assertEquals(0, buf.getReferenceManager().getAllocator().getLimit());
+ assertEquals(buf, buf.getReferenceManager().transferOwnership(buf, allocator).getTransferredBuffer());
+ assertEquals(0, buf.readerIndex());
+ assertEquals(0, buf.writerIndex());
+ assertEquals(1, buf.refCnt());
+
+ ArrowBuf derive = buf.getReferenceManager().deriveBuffer(buf, 0, 100);
+ assertEquals(derive, buf);
+ assertEquals(1, buf.refCnt());
+ assertEquals(1, derive.refCnt());
+
+ buf.close();
+
+ }
+
+}
diff --git a/src/arrow/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/TestEndianness.java b/src/arrow/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/TestEndianness.java
new file mode 100644
index 000000000..dcaeb2488
--- /dev/null
+++ b/src/arrow/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/TestEndianness.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+import static org.junit.Assert.assertEquals;
+
+import java.nio.ByteOrder;
+
+import org.junit.Test;
+
+import io.netty.buffer.ByteBuf;
+import io.netty.buffer.NettyArrowBuf;
+
+public class TestEndianness {
+
+ @Test
+ public void testNativeEndian() {
+ final BufferAllocator a = new RootAllocator(10000);
+ final ByteBuf b = NettyArrowBuf.unwrapBuffer(a.buffer(4));
+ b.setInt(0, 35);
+ if (ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN) {
+ assertEquals(b.getByte(0), 35);
+ assertEquals(b.getByte(1), 0);
+ assertEquals(b.getByte(2), 0);
+ assertEquals(b.getByte(3), 0);
+ } else {
+ assertEquals(b.getByte(0), 0);
+ assertEquals(b.getByte(1), 0);
+ assertEquals(b.getByte(2), 0);
+ assertEquals(b.getByte(3), 35);
+ }
+ b.release();
+ a.close();
+ }
+
+}
diff --git a/src/arrow/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/TestNettyAllocationManager.java b/src/arrow/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/TestNettyAllocationManager.java
new file mode 100644
index 000000000..1b64cd733
--- /dev/null
+++ b/src/arrow/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/TestNettyAllocationManager.java
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import org.junit.Test;
+
+/**
+ * Test cases for {@link NettyAllocationManager}.
+ */
+public class TestNettyAllocationManager {
+
+ static int CUSTOMIZED_ALLOCATION_CUTOFF_VALUE = 1024;
+
+ private BaseAllocator createCustomizedAllocator() {
+ return new RootAllocator(BaseAllocator.configBuilder()
+ .allocationManagerFactory(new AllocationManager.Factory() {
+ @Override
+ public AllocationManager create(BufferAllocator accountingAllocator, long size) {
+ return new NettyAllocationManager(accountingAllocator, size, CUSTOMIZED_ALLOCATION_CUTOFF_VALUE);
+ }
+
+ @Override
+ public ArrowBuf empty() {
+ return null;
+ }
+ }).build());
+ }
+
+ private void readWriteArrowBuf(ArrowBuf buffer) {
+ // write buffer
+ for (long i = 0; i < buffer.capacity() / 8; i++) {
+ buffer.setLong(i * 8, i);
+ }
+
+ // read buffer
+ for (long i = 0; i < buffer.capacity() / 8; i++) {
+ long val = buffer.getLong(i * 8);
+ assertEquals(i, val);
+ }
+ }
+
+ /**
+ * Test the allocation strategy for small buffers..
+ */
+ @Test
+ public void testSmallBufferAllocation() {
+ final long bufSize = CUSTOMIZED_ALLOCATION_CUTOFF_VALUE - 512L;
+ try (BaseAllocator allocator = createCustomizedAllocator();
+ ArrowBuf buffer = allocator.buffer(bufSize)) {
+
+ assertTrue(buffer.getReferenceManager() instanceof BufferLedger);
+ BufferLedger bufferLedger = (BufferLedger) buffer.getReferenceManager();
+
+ // make sure we are using netty allocation manager
+ AllocationManager allocMgr = bufferLedger.getAllocationManager();
+ assertTrue(allocMgr instanceof NettyAllocationManager);
+ NettyAllocationManager nettyMgr = (NettyAllocationManager) allocMgr;
+
+ // for the small buffer allocation strategy, the chunk is not null
+ assertNotNull(nettyMgr.getMemoryChunk());
+
+ readWriteArrowBuf(buffer);
+ }
+ }
+
+ /**
+ * Test the allocation strategy for large buffers..
+ */
+ @Test
+ public void testLargeBufferAllocation() {
+ final long bufSize = CUSTOMIZED_ALLOCATION_CUTOFF_VALUE + 1024L;
+ try (BaseAllocator allocator = createCustomizedAllocator();
+ ArrowBuf buffer = allocator.buffer(bufSize)) {
+ assertTrue(buffer.getReferenceManager() instanceof BufferLedger);
+ BufferLedger bufferLedger = (BufferLedger) buffer.getReferenceManager();
+
+ // make sure we are using netty allocation manager
+ AllocationManager allocMgr = bufferLedger.getAllocationManager();
+ assertTrue(allocMgr instanceof NettyAllocationManager);
+ NettyAllocationManager nettyMgr = (NettyAllocationManager) allocMgr;
+
+ // for the large buffer allocation strategy, the chunk is null
+ assertNull(nettyMgr.getMemoryChunk());
+
+ readWriteArrowBuf(buffer);
+ }
+ }
+}
diff --git a/src/arrow/java/memory/memory-netty/src/test/resources/logback.xml b/src/arrow/java/memory/memory-netty/src/test/resources/logback.xml
new file mode 100644
index 000000000..4c54d18a2
--- /dev/null
+++ b/src/arrow/java/memory/memory-netty/src/test/resources/logback.xml
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+ license agreements. See the NOTICE file distributed with this work for additional
+ information regarding copyright ownership. The ASF licenses this file to
+ You under the Apache License, Version 2.0 (the "License"); you may not use
+ this file except in compliance with the License. You may obtain a copy of
+ the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+ by applicable law or agreed to in writing, software distributed under the
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ OF ANY KIND, either express or implied. See the License for the specific
+ language governing permissions and limitations under the License. -->
+
+<configuration>
+ <statusListener class="ch.qos.logback.core.status.NopStatusListener"/>
+ <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
+ <!-- encoders are assigned the type
+ ch.qos.logback.classic.encoder.PatternLayoutEncoder by default -->
+ <encoder>
+ <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
+ </encoder>
+ </appender>
+
+ <logger name="org.apache.arrow" additivity="false">
+ <level value="info" />
+ <appender-ref ref="STDOUT" />
+ </logger>
+
+</configuration>
diff --git a/src/arrow/java/memory/memory-unsafe/pom.xml b/src/arrow/java/memory/memory-unsafe/pom.xml
new file mode 100644
index 000000000..6358784d5
--- /dev/null
+++ b/src/arrow/java/memory/memory-unsafe/pom.xml
@@ -0,0 +1,33 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+ license agreements. See the NOTICE file distributed with this work for additional
+ information regarding copyright ownership. The ASF licenses this file to
+ You under the Apache License, Version 2.0 (the "License"); you may not use
+ this file except in compliance with the License. You may obtain a copy of
+ the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+ by applicable law or agreed to in writing, software distributed under the
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ OF ANY KIND, either express or implied. See the License for the specific
+ language governing permissions and limitations under the License. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <parent>
+ <artifactId>arrow-memory</artifactId>
+ <groupId>org.apache.arrow</groupId>
+ <version>6.0.1</version>
+ </parent>
+ <modelVersion>4.0.0</modelVersion>
+
+ <artifactId>arrow-memory-unsafe</artifactId>
+ <name>Arrow Memory - Unsafe</name>
+ <description>Allocator and utils for allocating memory in Arrow based on sun.misc.Unsafe</description>
+
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-memory-core</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ </dependencies>
+
+</project>
diff --git a/src/arrow/java/memory/memory-unsafe/src/main/java/org/apache/arrow/memory/DefaultAllocationManagerFactory.java b/src/arrow/java/memory/memory-unsafe/src/main/java/org/apache/arrow/memory/DefaultAllocationManagerFactory.java
new file mode 100644
index 000000000..720c3d02d
--- /dev/null
+++ b/src/arrow/java/memory/memory-unsafe/src/main/java/org/apache/arrow/memory/DefaultAllocationManagerFactory.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+/**
+ * The default Allocation Manager Factory for a module.
+ *
+ */
+public class DefaultAllocationManagerFactory implements AllocationManager.Factory {
+
+ public static final AllocationManager.Factory FACTORY = UnsafeAllocationManager.FACTORY;
+
+ @Override
+ public AllocationManager create(BufferAllocator accountingAllocator, long size) {
+ return FACTORY.create(accountingAllocator, size);
+ }
+
+ @Override
+ public ArrowBuf empty() {
+ return UnsafeAllocationManager.FACTORY.empty();
+ }
+}
diff --git a/src/arrow/java/memory/memory-unsafe/src/main/java/org/apache/arrow/memory/UnsafeAllocationManager.java b/src/arrow/java/memory/memory-unsafe/src/main/java/org/apache/arrow/memory/UnsafeAllocationManager.java
new file mode 100644
index 000000000..b10aba359
--- /dev/null
+++ b/src/arrow/java/memory/memory-unsafe/src/main/java/org/apache/arrow/memory/UnsafeAllocationManager.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+import org.apache.arrow.memory.util.MemoryUtil;
+
+/**
+ * Allocation manager based on unsafe API.
+ */
+public final class UnsafeAllocationManager extends AllocationManager {
+
+ private static final ArrowBuf EMPTY = new ArrowBuf(ReferenceManager.NO_OP,
+ null,
+ 0,
+ MemoryUtil.UNSAFE.allocateMemory(0)
+ );
+
+ public static final AllocationManager.Factory FACTORY = new Factory() {
+ @Override
+ public AllocationManager create(BufferAllocator accountingAllocator, long size) {
+ return new UnsafeAllocationManager(accountingAllocator, size);
+ }
+
+ @Override
+ public ArrowBuf empty() {
+ return EMPTY;
+ }
+ };
+
+ private final long allocatedSize;
+
+ private final long allocatedAddress;
+
+ UnsafeAllocationManager(BufferAllocator accountingAllocator, long requestedSize) {
+ super(accountingAllocator);
+ allocatedAddress = MemoryUtil.UNSAFE.allocateMemory(requestedSize);
+ allocatedSize = requestedSize;
+ }
+
+ @Override
+ public long getSize() {
+ return allocatedSize;
+ }
+
+ @Override
+ protected long memoryAddress() {
+ return allocatedAddress;
+ }
+
+ @Override
+ protected void release0() {
+ MemoryUtil.UNSAFE.freeMemory(allocatedAddress);
+ }
+
+}
diff --git a/src/arrow/java/memory/memory-unsafe/src/test/java/org/apache/arrow/memory/TestAllocationManagerUnsafe.java b/src/arrow/java/memory/memory-unsafe/src/test/java/org/apache/arrow/memory/TestAllocationManagerUnsafe.java
new file mode 100644
index 000000000..33abe92e5
--- /dev/null
+++ b/src/arrow/java/memory/memory-unsafe/src/test/java/org/apache/arrow/memory/TestAllocationManagerUnsafe.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+import static org.junit.Assert.assertEquals;
+
+import org.junit.Test;
+
+/**
+ * Test cases for {@link AllocationManager}.
+ */
+public class TestAllocationManagerUnsafe {
+
+ @Test
+ public void testAllocationManagerType() {
+
+ // test unsafe allocation manager type
+ System.setProperty(
+ DefaultAllocationManagerOption.ALLOCATION_MANAGER_TYPE_PROPERTY_NAME, "Unsafe");
+ DefaultAllocationManagerOption.AllocationManagerType mgrType =
+ DefaultAllocationManagerOption.getDefaultAllocationManagerType();
+
+ assertEquals(DefaultAllocationManagerOption.AllocationManagerType.Unsafe, mgrType);
+
+ }
+}
diff --git a/src/arrow/java/memory/memory-unsafe/src/test/java/org/apache/arrow/memory/TestUnsafeAllocationManager.java b/src/arrow/java/memory/memory-unsafe/src/test/java/org/apache/arrow/memory/TestUnsafeAllocationManager.java
new file mode 100644
index 000000000..c15882a37
--- /dev/null
+++ b/src/arrow/java/memory/memory-unsafe/src/test/java/org/apache/arrow/memory/TestUnsafeAllocationManager.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.junit.Test;
+
+/**
+ * Test cases for {@link UnsafeAllocationManager}.
+ */
+public class TestUnsafeAllocationManager {
+
+ private BaseAllocator createUnsafeAllocator() {
+ return new RootAllocator(BaseAllocator.configBuilder().allocationManagerFactory(UnsafeAllocationManager.FACTORY)
+ .build());
+ }
+
+ private void readWriteArrowBuf(ArrowBuf buffer) {
+ // write buffer
+ for (long i = 0; i < buffer.capacity() / 8; i++) {
+ buffer.setLong(i * 8, i);
+ }
+
+ // read buffer
+ for (long i = 0; i < buffer.capacity() / 8; i++) {
+ long val = buffer.getLong(i * 8);
+ assertEquals(i, val);
+ }
+ }
+
+ /**
+ * Test the memory allocation for {@link UnsafeAllocationManager}.
+ */
+ @Test
+ public void testBufferAllocation() {
+ final long bufSize = 4096L;
+ try (BaseAllocator allocator = createUnsafeAllocator();
+ ArrowBuf buffer = allocator.buffer(bufSize)) {
+ assertTrue(buffer.getReferenceManager() instanceof BufferLedger);
+ BufferLedger bufferLedger = (BufferLedger) buffer.getReferenceManager();
+
+ // make sure we are using unsafe allocation manager
+ AllocationManager allocMgr = bufferLedger.getAllocationManager();
+ assertTrue(allocMgr instanceof UnsafeAllocationManager);
+ UnsafeAllocationManager unsafeMgr = (UnsafeAllocationManager) allocMgr;
+
+ assertEquals(bufSize, unsafeMgr.getSize());
+ readWriteArrowBuf(buffer);
+ }
+ }
+}
diff --git a/src/arrow/java/memory/pom.xml b/src/arrow/java/memory/pom.xml
new file mode 100644
index 000000000..7f2e642f0
--- /dev/null
+++ b/src/arrow/java/memory/pom.xml
@@ -0,0 +1,29 @@
+<?xml version="1.0"?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+ license agreements. See the NOTICE file distributed with this work for additional
+ information regarding copyright ownership. The ASF licenses this file to
+ You under the Apache License, Version 2.0 (the "License"); you may not use
+ this file except in compliance with the License. You may obtain a copy of
+ the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+ by applicable law or agreed to in writing, software distributed under the
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ OF ANY KIND, either express or implied. See the License for the specific
+ language governing permissions and limitations under the License. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-java-root</artifactId>
+ <version>6.0.1</version>
+ </parent>
+ <artifactId>arrow-memory</artifactId>
+ <name>Arrow Memory</name>
+ <packaging>pom</packaging>
+
+ <modules>
+ <module>memory-core</module>
+ <module>memory-unsafe</module>
+ <module>memory-netty</module>
+ </modules>
+
+</project>
diff --git a/src/arrow/java/memory/src/test/java/io/netty/buffer/TestExpandableByteBuf.java b/src/arrow/java/memory/src/test/java/io/netty/buffer/TestExpandableByteBuf.java
new file mode 100644
index 000000000..b39cca8e8
--- /dev/null
+++ b/src/arrow/java/memory/src/test/java/io/netty/buffer/TestExpandableByteBuf.java
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.netty.buffer;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestExpandableByteBuf {
+
+ @Test
+ public void testCapacity() {
+ try (BufferAllocator allocator = new RootAllocator(128);
+ ArrowBuf buf = allocator.buffer(20);
+ ) {
+ NettyArrowBuf srcByteBuf = NettyArrowBuf.unwrapBuffer(buf);
+ ExpandableByteBuf expandableByteBuf = new ExpandableByteBuf(srcByteBuf, allocator);
+ ByteBuf newByteBuf = expandableByteBuf.capacity(31);
+ int capacity = newByteBuf.capacity();
+ Assert.assertEquals(32, capacity);
+ }
+ }
+
+ @Test
+ public void testCapacity1() {
+ try (BufferAllocator allocator = new RootAllocator(128);
+ ArrowBuf buf = allocator.buffer(20);
+ ) {
+ NettyArrowBuf srcByteBuf = NettyArrowBuf.unwrapBuffer(buf);
+ ExpandableByteBuf expandableByteBuf = new ExpandableByteBuf(srcByteBuf, allocator);
+ ByteBuf newByteBuf = expandableByteBuf.capacity(32);
+ int capacity = newByteBuf.capacity();
+ Assert.assertEquals(32, capacity);
+ }
+ }
+
+ @Test
+ public void testSetAndGetIntValues() {
+ try (BufferAllocator allocator = new RootAllocator(128);
+ ArrowBuf buf = allocator.buffer(20);
+ ) {
+ NettyArrowBuf srcByteBuf = NettyArrowBuf.unwrapBuffer(buf);
+ ExpandableByteBuf expandableByteBuf = new ExpandableByteBuf(srcByteBuf, allocator);
+ int [] intVals = new int[] {Integer.MIN_VALUE, Short.MIN_VALUE - 1, Short.MIN_VALUE, 0 ,
+ Short.MAX_VALUE , Short.MAX_VALUE + 1, Integer.MAX_VALUE};
+ for (int intValue :intVals) {
+ expandableByteBuf.setInt(0, intValue);
+ Assert.assertEquals(expandableByteBuf.getInt(0), intValue);
+ Assert.assertEquals(expandableByteBuf.getIntLE(0), Integer.reverseBytes(intValue));
+ }
+ }
+ }
+
+ @Test
+ public void testSetAndGetLongValues() {
+ try (BufferAllocator allocator = new RootAllocator(128);
+ ArrowBuf buf = allocator.buffer(20);
+ ) {
+ NettyArrowBuf srcByteBuf = NettyArrowBuf.unwrapBuffer(buf);
+ ExpandableByteBuf expandableByteBuf = new ExpandableByteBuf(srcByteBuf, allocator);
+ long [] longVals = new long[] {Long.MIN_VALUE, 0 , Long.MAX_VALUE};
+ for (long longValue :longVals) {
+ expandableByteBuf.setLong(0, longValue);
+ Assert.assertEquals(expandableByteBuf.getLong(0), longValue);
+ Assert.assertEquals(expandableByteBuf.getLongLE(0), Long.reverseBytes(longValue));
+ }
+ }
+ }
+
+ @Test
+ public void testSetAndGetShortValues() {
+ try (BufferAllocator allocator = new RootAllocator(128);
+ ArrowBuf buf = allocator.buffer(20);
+ ) {
+ NettyArrowBuf srcByteBuf = NettyArrowBuf.unwrapBuffer(buf);
+ ExpandableByteBuf expandableByteBuf = new ExpandableByteBuf(srcByteBuf, allocator);
+ short [] shortVals = new short[] {Short.MIN_VALUE, 0 , Short.MAX_VALUE};
+ for (short shortValue :shortVals) {
+ expandableByteBuf.setShort(0, shortValue);
+ Assert.assertEquals(expandableByteBuf.getShort(0), shortValue);
+ Assert.assertEquals(expandableByteBuf.getShortLE(0), Short.reverseBytes(shortValue));
+ }
+ }
+ }
+
+ @Test
+ public void testSetAndGetByteValues() {
+ try (BufferAllocator allocator = new RootAllocator(128);
+ ArrowBuf buf = allocator.buffer(20);
+ ) {
+ NettyArrowBuf srcByteBuf = NettyArrowBuf.unwrapBuffer(buf);
+ ExpandableByteBuf expandableByteBuf = new ExpandableByteBuf(srcByteBuf, allocator);
+ byte [] byteVals = new byte[] {Byte.MIN_VALUE, 0 , Byte.MAX_VALUE};
+ for (short byteValue :byteVals) {
+ expandableByteBuf.setByte(0, byteValue);
+ Assert.assertEquals(expandableByteBuf.getByte(0), byteValue);
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/performance/pom.xml b/src/arrow/java/performance/pom.xml
new file mode 100644
index 000000000..c2be88c8a
--- /dev/null
+++ b/src/arrow/java/performance/pom.xml
@@ -0,0 +1,233 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+ license agreements. See the NOTICE file distributed with this work for additional
+ information regarding copyright ownership. The ASF licenses this file to
+ You under the Apache License, Version 2.0 (the "License"); you may not use
+ this file except in compliance with the License. You may obtain a copy of
+ the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+ by applicable law or agreed to in writing, software distributed under the
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ OF ANY KIND, either express or implied. See the License for the specific
+ language governing permissions and limitations under the License. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <artifactId>arrow-java-root</artifactId>
+ <groupId>org.apache.arrow</groupId>
+ <version>6.0.1</version>
+ </parent>
+ <artifactId>arrow-performance</artifactId>
+ <packaging>jar</packaging>
+ <name>Arrow Performance Benchmarks</name>
+ <description>JMH Performance benchmarks for other Arrow libraries.</description>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.openjdk.jmh</groupId>
+ <artifactId>jmh-core</artifactId>
+ <version>${jmh.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.openjdk.jmh</groupId>
+ <artifactId>jmh-generator-annprocess</artifactId>
+ <version>${jmh.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.logging.log4j</groupId>
+ <artifactId>log4j-slf4j-impl</artifactId>
+ <version>2.1</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.logging.log4j</groupId>
+ <artifactId>log4j-core</artifactId>
+ <version>2.1</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-vector</artifactId>
+ <version>${project.version}</version>
+ <classifier>${arrow.vector.classifier}</classifier>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-memory-core</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-memory-netty</artifactId>
+ <version>${project.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.avro</groupId>
+ <artifactId>avro</artifactId>
+ <version>${dep.avro.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-avro</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>com.h2database</groupId>
+ <artifactId>h2</artifactId>
+ <version>1.4.196</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-jdbc</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-algorithm</artifactId>
+ <version>6.0.1</version>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+
+ <properties>
+ <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+ <jmh.version>1.21</jmh.version>
+ <javac.target>1.8</javac.target>
+ <uberjar.name>benchmarks</uberjar.name>
+ <skip.perf.benchmarks>true</skip.perf.benchmarks>
+ <benchmark.filter>.*</benchmark.filter>
+ <benchmark.forks>1</benchmark.forks>
+ <benchmark.jvmargs> </benchmark.jvmargs>
+ <benchmark.warmups>5</benchmark.warmups>
+ <benchmark.runs>5</benchmark.runs>
+ <benchmark.list> </benchmark.list>
+ <benchmark.resultfile>jmh-result.json</benchmark.resultfile>
+ <benchmark.resultformat>json</benchmark.resultformat>
+ </properties>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <configuration combine.self="override">
+ <compilerVersion>${javac.target}</compilerVersion>
+ <source>${javac.target}</source>
+ <target>${javac.target}</target>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-shade-plugin</artifactId>
+ <version>2.2</version>
+ <executions>
+ <execution>
+ <phase>package</phase>
+ <goals>
+ <goal>shade</goal>
+ </goals>
+ <configuration>
+ <finalName>${uberjar.name}</finalName>
+ <transformers>
+ <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
+ <mainClass>org.openjdk.jmh.Main</mainClass>
+ </transformer>
+ </transformers>
+ <filters>
+ <filter>
+ <artifact>*:*</artifact>
+ <excludes>
+ <exclude>META-INF/*.SF</exclude>
+ <exclude>META-INF/*.DSA</exclude>
+ <exclude>META-INF/*.RSA</exclude>
+ </excludes>
+ </filter>
+ </filters>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>exec-maven-plugin</artifactId>
+ <version>1.6.0</version>
+ <executions>
+ <execution>
+ <id>run-java-benchmarks</id>
+ <phase>integration-test</phase>
+ <goals>
+ <goal>exec</goal>
+ </goals>
+ </execution>
+ </executions>
+ <configuration>
+ <skip>${skip.perf.benchmarks}</skip>
+ <classpathScope>test</classpathScope>
+ <executable>java</executable>
+ <arguments>
+ <argument>-classpath</argument>
+ <classpath />
+ <argument>org.openjdk.jmh.Main</argument>
+ <argument>${benchmark.filter}</argument>
+ <argument>-f</argument>
+ <argument>${benchmark.forks}</argument>
+ <argument>-jvmArgs</argument>
+ <argument>${benchmark.jvmargs}</argument>
+ <argument>-wi</argument>
+ <argument>${benchmark.warmups}</argument>
+ <argument>-i</argument>
+ <argument>${benchmark.runs}</argument>
+ <argument>${benchmark.list}</argument>
+ <argument>-rff</argument>
+ <argument>${benchmark.resultfile}</argument>
+ <argument>-rf</argument>
+ <argument>${benchmark.resultformat}</argument>
+ </arguments>
+ </configuration>
+ </plugin>
+ </plugins>
+ <pluginManagement>
+ <plugins>
+ <plugin>
+ <artifactId>maven-clean-plugin</artifactId>
+ <version>2.5</version>
+ </plugin>
+ <plugin>
+ <artifactId>maven-deploy-plugin</artifactId>
+ <version>2.8.1</version>
+ </plugin>
+ <plugin>
+ <artifactId>maven-install-plugin</artifactId>
+ <version>2.5.1</version>
+ </plugin>
+ <plugin>
+ <artifactId>maven-jar-plugin</artifactId>
+ <version>2.4</version>
+ </plugin>
+ <plugin>
+ <artifactId>maven-javadoc-plugin</artifactId>
+ <version>2.9.1</version>
+ </plugin>
+ <plugin>
+ <artifactId>maven-resources-plugin</artifactId>
+ <version>2.6</version>
+ </plugin>
+ <plugin>
+ <artifactId>maven-site-plugin</artifactId>
+ <version>3.3</version>
+ </plugin>
+ <plugin>
+ <artifactId>maven-source-plugin</artifactId>
+ <version>2.2.1</version>
+ </plugin>
+ <plugin>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <version>3.0.0-M3</version>
+ </plugin>
+ </plugins>
+ </pluginManagement>
+ </build>
+
+</project>
diff --git a/src/arrow/java/performance/src/test/java/org/apache/arrow/adapter/AvroAdapterBenchmarks.java b/src/arrow/java/performance/src/test/java/org/apache/arrow/adapter/AvroAdapterBenchmarks.java
new file mode 100644
index 000000000..884647b5a
--- /dev/null
+++ b/src/arrow/java/performance/src/test/java/org/apache/arrow/adapter/AvroAdapterBenchmarks.java
@@ -0,0 +1,141 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.arrow.AvroToArrow;
+import org.apache.arrow.AvroToArrowConfig;
+import org.apache.arrow.AvroToArrowConfigBuilder;
+import org.apache.arrow.AvroToArrowVectorIterator;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.io.BinaryDecoder;
+import org.apache.avro.io.BinaryEncoder;
+import org.apache.avro.io.DatumWriter;
+import org.apache.avro.io.Decoder;
+import org.apache.avro.io.DecoderFactory;
+import org.apache.avro.io.EncoderFactory;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+/**
+ * Benchmarks for avro adapter.
+ */
+@State(Scope.Benchmark)
+public class AvroAdapterBenchmarks {
+
+ private final int valueCount = 3000;
+
+ private AvroToArrowConfig config;
+
+ private Schema schema;
+ private BinaryDecoder decoder;
+
+ /**
+ * Setup benchmarks.
+ */
+ @Setup
+ public void prepare() throws Exception {
+ BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+ config = new AvroToArrowConfigBuilder(allocator).build();
+
+ String schemaStr = "{\n" + " \"namespace\": \"org.apache.arrow.avro\",\n" +
+ " \"type\": \"record\",\n" + " \"name\": \"testBenchmark\",\n" + " \"fields\": [\n" +
+ " {\"name\": \"f0\", \"type\": \"string\"},\n" +
+ " {\"name\": \"f1\", \"type\": \"int\"},\n" +
+ " {\"name\": \"f2\", \"type\": \"long\"},\n" +
+ " {\"name\": \"f3\", \"type\": \"boolean\"},\n" +
+ " {\"name\": \"f4\", \"type\": \"float\"}\n" + " ]\n" + "}";
+ schema = new Schema.Parser().parse(schemaStr);
+
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ BinaryEncoder encoder = new EncoderFactory().directBinaryEncoder(out, null);
+ DatumWriter writer = new GenericDatumWriter(schema);
+
+ for (int i = 0; i < valueCount; i++) {
+ GenericRecord record = new GenericData.Record(schema);
+ record.put(0, "test" + i);
+ record.put(1, i);
+ record.put(2, i + 1L);
+ record.put(3, i % 2 == 0);
+ record.put(4, i + 0.1f);
+ writer.write(record, encoder);
+ }
+
+ decoder = new DecoderFactory().directBinaryDecoder(new ByteArrayInputStream(out.toByteArray()), null);
+ }
+
+ /**
+ * Tear down benchmarks.
+ */
+ @TearDown
+ public void tearDown() {
+ config.getAllocator().close();
+ }
+
+ /**
+ * Test {@link AvroToArrow#avroToArrowIterator(Schema, Decoder, AvroToArrowConfig)}.
+ * @return useless. To avoid DCE by JIT.
+ */
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.NANOSECONDS)
+ public int testAvroToArrow() throws Exception {
+ decoder.inputStream().reset();
+ int sum = 0;
+ try (AvroToArrowVectorIterator iter = AvroToArrow.avroToArrowIterator(schema, decoder, config)) {
+ while (iter.hasNext()) {
+ VectorSchemaRoot root = iter.next();
+ IntVector intVector = (IntVector) root.getVector("f1");
+ for (int i = 0; i < intVector.getValueCount(); i++) {
+ sum += intVector.get(i);
+ }
+ root.close();
+ }
+ }
+ return sum;
+ }
+
+ public static void main(String[] args) throws RunnerException {
+ Options opt = new OptionsBuilder()
+ .include(AvroAdapterBenchmarks.class.getSimpleName())
+ .forks(1)
+ .build();
+
+ new Runner(opt).run();
+ }
+}
diff --git a/src/arrow/java/performance/src/test/java/org/apache/arrow/adapter/jdbc/JdbcAdapterBenchmarks.java b/src/arrow/java/performance/src/test/java/org/apache/arrow/adapter/jdbc/JdbcAdapterBenchmarks.java
new file mode 100644
index 000000000..fd3940b4c
--- /dev/null
+++ b/src/arrow/java/performance/src/test/java/org/apache/arrow/adapter/jdbc/JdbcAdapterBenchmarks.java
@@ -0,0 +1,359 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.Statement;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.arrow.adapter.jdbc.consumer.BigIntConsumer;
+import org.apache.arrow.adapter.jdbc.consumer.BitConsumer;
+import org.apache.arrow.adapter.jdbc.consumer.IntConsumer;
+import org.apache.arrow.adapter.jdbc.consumer.JdbcConsumer;
+import org.apache.arrow.adapter.jdbc.consumer.VarCharConsumer;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.BitVector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+/**
+ * Benchmarks for Jdbc adapter.
+ */
+public class JdbcAdapterBenchmarks {
+
+ private static final int VALUE_COUNT = 3000;
+
+ private static final String CREATE_STATEMENT =
+ "CREATE TABLE test_table (f0 INT, f1 LONG, f2 VARCHAR, f3 BOOLEAN);";
+ private static final String INSERT_STATEMENT =
+ "INSERT INTO test_table (f0, f1, f2, f3) VALUES (?, ?, ?, ?);";
+ private static final String QUERY = "SELECT f0, f1, f2, f3 FROM test_table;";
+ private static final String DROP_STATEMENT = "DROP TABLE test_table;";
+
+ private static final String URL = "jdbc:h2:mem:JdbcAdapterBenchmarks";
+ private static final String DRIVER = "org.h2.Driver";
+
+ /**
+ * State object for the jdbc e2e benchmark.
+ */
+ @State(Scope.Benchmark)
+ public static class JdbcState {
+
+ private Connection conn = null;
+
+ private ResultSet resultSet = null;
+
+ private BufferAllocator allocator;
+
+ private Statement statement;
+
+ private JdbcToArrowConfig config;
+
+ @Setup(Level.Trial)
+ public void prepareState() throws Exception {
+ allocator = new RootAllocator(Integer.MAX_VALUE);
+ config = new JdbcToArrowConfigBuilder().setAllocator(allocator).setTargetBatchSize(1024).build();
+ Class.forName(DRIVER);
+ conn = DriverManager.getConnection(URL);
+
+ try (Statement stmt = conn.createStatement()) {
+ stmt.executeUpdate(CREATE_STATEMENT);
+ }
+
+ for (int i = 0; i < VALUE_COUNT; i++) {
+ // Insert data
+ try (PreparedStatement stmt = conn.prepareStatement(INSERT_STATEMENT)) {
+
+ stmt.setInt(1, i);
+ stmt.setLong(2, i);
+ stmt.setString(3, "test" + i);
+ stmt.setBoolean(4, i % 2 == 0);
+ stmt.executeUpdate();
+ }
+ }
+ }
+
+ @Setup(Level.Invocation)
+ public void prepareInvoke() throws Exception {
+ statement = conn.createStatement();
+ resultSet = statement.executeQuery(QUERY);
+ }
+
+ @TearDown(Level.Invocation)
+ public void tearDownInvoke() throws Exception {
+ resultSet.close();
+ statement.close();
+ }
+
+ @TearDown(Level.Trial)
+ public void tearDownState() throws Exception {
+ try (Statement stmt = conn.createStatement()) {
+ stmt.executeUpdate(DROP_STATEMENT);
+ }
+ allocator.close();
+ }
+ }
+
+ /**
+ * State object for the consume benchmark.
+ */
+ @State(Scope.Benchmark)
+ public static class ConsumeState {
+
+ private static final boolean NULLABLE = true;
+
+ private Connection conn = null;
+
+ private ResultSet resultSet = null;
+
+ private BufferAllocator allocator;
+
+ private Statement statement;
+
+ private IntVector intVector;
+
+ private BigIntVector longVector;
+
+ private VarCharVector varCharVector;
+
+ private BitVector bitVector;
+
+ private JdbcConsumer<IntVector> intConsumer;
+
+ private JdbcConsumer<BigIntVector> longConsumer;
+
+ private JdbcConsumer<VarCharVector> varCharConsumer;
+
+ private JdbcConsumer<BitVector> bitConsumer;
+
+ private JdbcToArrowConfig config;
+
+ @Setup(Level.Trial)
+ public void prepare() throws Exception {
+ allocator = new RootAllocator(Integer.MAX_VALUE);
+ config = new JdbcToArrowConfigBuilder().setAllocator(allocator).setTargetBatchSize(1024).build();
+
+ Class.forName(DRIVER);
+ conn = DriverManager.getConnection(URL);
+ try (Statement stmt = conn.createStatement()) {
+ stmt.executeUpdate(CREATE_STATEMENT);
+ }
+
+ for (int i = 0; i < VALUE_COUNT; i++) {
+ // Insert data
+ try (PreparedStatement stmt = conn.prepareStatement(INSERT_STATEMENT)) {
+
+ stmt.setInt(1, i);
+ stmt.setLong(2, i);
+ stmt.setString(3, "test" + i);
+ stmt.setBoolean(4, i % 2 == 0);
+ stmt.executeUpdate();
+ }
+ }
+
+ statement = conn.createStatement();
+ resultSet = statement.executeQuery(QUERY);
+ resultSet.next();
+
+ intVector = new IntVector("", allocator);
+ intVector.allocateNew(VALUE_COUNT);
+ intConsumer = IntConsumer.createConsumer(intVector, 1, NULLABLE);
+
+ longVector = new BigIntVector("", allocator);
+ longVector.allocateNew(VALUE_COUNT);
+ longConsumer = BigIntConsumer.createConsumer(longVector, 2, NULLABLE);
+
+ varCharVector = new VarCharVector("", allocator);
+ varCharVector.allocateNew(VALUE_COUNT);
+ varCharConsumer = VarCharConsumer.createConsumer(varCharVector, 3, NULLABLE);
+
+ bitVector = new BitVector("", allocator);
+ bitVector.allocateNew(VALUE_COUNT);
+ bitConsumer = BitConsumer.createConsumer(bitVector, 4, NULLABLE);
+ }
+
+ @TearDown(Level.Trial)
+ public void tearDown() throws Exception {
+ try (Statement stmt = conn.createStatement()) {
+ stmt.executeUpdate(DROP_STATEMENT);
+ }
+
+ resultSet.close();
+ statement.close();
+ conn.close();
+
+ intVector.close();
+ intConsumer.close();
+
+ longVector.close();
+ longConsumer.close();
+
+ varCharVector.close();
+ varCharConsumer.close();
+
+ bitVector.close();
+ bitConsumer.close();
+
+ allocator.close();
+ }
+ }
+
+ /**
+ * State object for the jdbc row consume benchmark.
+ */
+ @State(Scope.Benchmark)
+ public static class RowConsumeState {
+
+ private Connection conn = null;
+
+ private ResultSet resultSet = null;
+
+ private BufferAllocator allocator;
+
+ private Statement statement;
+
+ private JdbcToArrowConfig config;
+
+ private ArrowVectorIterator iter;
+
+ private VectorSchemaRoot root;
+
+ @Setup(Level.Trial)
+ public void prepareState() throws Exception {
+ allocator = new RootAllocator(Integer.MAX_VALUE);
+ config = new JdbcToArrowConfigBuilder().setAllocator(allocator).setTargetBatchSize(VALUE_COUNT).build();
+ Class.forName(DRIVER);
+ conn = DriverManager.getConnection(URL);
+
+ try (Statement stmt = conn.createStatement()) {
+ stmt.executeUpdate(CREATE_STATEMENT);
+ }
+
+ for (int i = 0; i < VALUE_COUNT; i++) {
+ // Insert data
+ try (PreparedStatement stmt = conn.prepareStatement(INSERT_STATEMENT)) {
+
+ stmt.setInt(1, i);
+ stmt.setLong(2, i);
+ stmt.setString(3, "test" + i);
+ stmt.setBoolean(4, i % 2 == 0);
+ stmt.executeUpdate();
+ }
+ }
+ }
+
+ @Setup(Level.Invocation)
+ public void prepareInvoke() throws Exception {
+ statement = conn.createStatement();
+ resultSet = statement.executeQuery(QUERY);
+
+ iter = JdbcToArrow.sqlToArrowVectorIterator(resultSet, config);
+ root = iter.next();
+ iter.compositeConsumer.resetVectorSchemaRoot(root);
+ }
+
+ @TearDown(Level.Invocation)
+ public void tearDownInvoke() throws Exception {
+ resultSet.close();
+ statement.close();
+ iter.close();
+ }
+
+ @TearDown(Level.Trial)
+ public void tearDownState() throws Exception {
+ try (Statement stmt = conn.createStatement()) {
+ stmt.executeUpdate(DROP_STATEMENT);
+ }
+ allocator.close();
+ }
+ }
+
+ /**
+ * Test {@link JdbcToArrow#sqlToArrowVectorIterator(ResultSet, JdbcToArrowConfig)}.
+ * @return useless. To avoid DCE by JIT.
+ */
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.MICROSECONDS)
+ public int testJdbcToArrow(JdbcState state) throws Exception {
+ int valueCount = 0;
+ try (ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(state.resultSet, state.config)) {
+ while (iter.hasNext()) {
+ VectorSchemaRoot root = iter.next();
+ IntVector intVector = (IntVector) root.getFieldVectors().get(0);
+ valueCount += intVector.getValueCount();
+ root.close();
+ }
+ }
+ return valueCount;
+ }
+
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.MICROSECONDS)
+ public void consumeBenchmark(ConsumeState state) throws Exception {
+ state.intConsumer.resetValueVector(state.intVector);
+ state.longConsumer.resetValueVector(state.longVector);
+ state.varCharConsumer.resetValueVector(state.varCharVector);
+ state.bitConsumer.resetValueVector(state.bitVector);
+ for (int i = 0; i < VALUE_COUNT; i++) {
+ state.intConsumer.consume(state.resultSet);
+ state.longConsumer.consume(state.resultSet);
+ state.varCharConsumer.consume(state.resultSet);
+ state.bitConsumer.consume(state.resultSet);
+ }
+ }
+
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.MICROSECONDS)
+ public void consumeRowsBenchmark(RowConsumeState state) throws Exception {
+ for (int i = 0; i < VALUE_COUNT; i++) {
+ state.iter.compositeConsumer.consume(state.resultSet);
+ }
+ }
+
+ public static void main(String[] args) throws RunnerException {
+ Options opt = new OptionsBuilder()
+ .include(JdbcAdapterBenchmarks.class.getSimpleName())
+ .forks(1)
+ .build();
+
+ new Runner(opt).run();
+ }
+}
+
diff --git a/src/arrow/java/performance/src/test/java/org/apache/arrow/algorithm/search/ParallelSearcherBenchmarks.java b/src/arrow/java/performance/src/test/java/org/apache/arrow/algorithm/search/ParallelSearcherBenchmarks.java
new file mode 100644
index 000000000..1c3af77e7
--- /dev/null
+++ b/src/arrow/java/performance/src/test/java/org/apache/arrow/algorithm/search/ParallelSearcherBenchmarks.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.search;
+
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.IntVector;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+/**
+ * Benchmarks for {@link ParallelSearcher}.
+ */
+public class ParallelSearcherBenchmarks {
+
+ private static final int VECTOR_LENGTH = 1024 * 1024;
+
+ /**
+ * State object for the benchmarks.
+ */
+ @State(Scope.Benchmark)
+ public static class SearchState {
+
+ @Param({"1", "2", "5", "10", "20", "50", "100"})
+ int numThreads;
+
+ BufferAllocator allocator;
+
+ ExecutorService threadPool;
+
+ IntVector targetVector;
+
+ IntVector keyVector;
+
+ ParallelSearcher<IntVector> searcher;
+
+ @Setup(Level.Trial)
+ public void prepare() {
+ allocator = new RootAllocator(Integer.MAX_VALUE);
+ targetVector = new IntVector("target vector", allocator);
+ targetVector.allocateNew(VECTOR_LENGTH);
+ keyVector = new IntVector("key vector", allocator);
+ keyVector.allocateNew(1);
+ threadPool = Executors.newFixedThreadPool(numThreads);
+
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ targetVector.set(i, i);
+ }
+ targetVector.setValueCount(VECTOR_LENGTH);
+
+ keyVector.set(0, VECTOR_LENGTH / 3);
+ keyVector.setValueCount(1);
+ }
+
+ @Setup(Level.Invocation)
+ public void prepareInvoke() {
+ searcher = new ParallelSearcher<>(targetVector, threadPool, numThreads);
+ }
+
+ @TearDown(Level.Trial)
+ public void tearDownState() {
+ targetVector.close();
+ keyVector.close();
+ allocator.close();
+ threadPool.shutdown();
+ }
+ }
+
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.MICROSECONDS)
+ public void searchBenchmark(SearchState state) throws Exception {
+ state.searcher.search(state.keyVector, 0);
+ }
+
+ public static void main(String[] args) throws RunnerException {
+ Options opt = new OptionsBuilder()
+ .include(ParallelSearcherBenchmarks.class.getSimpleName())
+ .forks(1)
+ .build();
+
+ new Runner(opt).run();
+ }
+}
diff --git a/src/arrow/java/performance/src/test/java/org/apache/arrow/memory/AllocatorBenchmarks.java b/src/arrow/java/performance/src/test/java/org/apache/arrow/memory/AllocatorBenchmarks.java
new file mode 100644
index 000000000..88fcf73f0
--- /dev/null
+++ b/src/arrow/java/performance/src/test/java/org/apache/arrow/memory/AllocatorBenchmarks.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+import java.util.concurrent.TimeUnit;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.rounding.RoundingPolicy;
+import org.apache.arrow.memory.rounding.SegmentRoundingPolicy;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+/**
+ * Benchmarks for allocators.
+ */
+public class AllocatorBenchmarks {
+
+ /**
+ * Benchmark for the default allocator.
+ */
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.MICROSECONDS)
+ public void defaultAllocatorBenchmark() {
+ final int bufferSize = 1024;
+ final int numBuffers = 1024;
+
+ try (RootAllocator allocator = new RootAllocator(numBuffers * bufferSize)) {
+ ArrowBuf[] buffers = new ArrowBuf[numBuffers];
+
+ for (int i = 0; i < numBuffers; i++) {
+ buffers[i] = allocator.buffer(bufferSize);
+ }
+
+ for (int i = 0; i < numBuffers; i++) {
+ buffers[i].close();
+ }
+ }
+ }
+
+ /**
+ * Benchmark for allocator with segment rounding policy.
+ */
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.MICROSECONDS)
+ public void segmentRoundingPolicyBenchmark() {
+ final int bufferSize = 1024;
+ final int numBuffers = 1024;
+ final int segmentSize = 1024;
+
+ RoundingPolicy policy = new SegmentRoundingPolicy(segmentSize);
+ try (RootAllocator allocator = new RootAllocator(AllocationListener.NOOP, bufferSize * numBuffers, policy)) {
+ ArrowBuf[] buffers = new ArrowBuf[numBuffers];
+
+ for (int i = 0; i < numBuffers; i++) {
+ buffers[i] = allocator.buffer(bufferSize);
+ }
+
+ for (int i = 0; i < numBuffers; i++) {
+ buffers[i].close();
+ }
+ }
+ }
+
+ public static void main(String[] args) throws RunnerException {
+ Options opt = new OptionsBuilder()
+ .include(AllocatorBenchmarks.class.getSimpleName())
+ .forks(1)
+ .build();
+
+ new Runner(opt).run();
+ }
+}
diff --git a/src/arrow/java/performance/src/test/java/org/apache/arrow/memory/ArrowBufBenchmarks.java b/src/arrow/java/performance/src/test/java/org/apache/arrow/memory/ArrowBufBenchmarks.java
new file mode 100644
index 000000000..ef4da5828
--- /dev/null
+++ b/src/arrow/java/performance/src/test/java/org/apache/arrow/memory/ArrowBufBenchmarks.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+import java.util.concurrent.TimeUnit;
+
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+/**
+ * Benchmarks for {@link ArrowBuf}.
+ */
+@State(Scope.Benchmark)
+public class ArrowBufBenchmarks {
+
+ private static final int BUFFER_CAPACITY = 1024 * 1024;
+
+ private static final int ALLOCATOR_CAPACITY = 1024 * 1024;
+
+ private BufferAllocator allocator;
+
+ private ArrowBuf buffer;
+
+ /**
+ * Setup benchmarks.
+ */
+ @Setup
+ public void prepare() {
+ allocator = new RootAllocator(ALLOCATOR_CAPACITY);
+ buffer = allocator.buffer(BUFFER_CAPACITY);
+ }
+
+ /**
+ * Tear down benchmarks.
+ */
+ @TearDown
+ public void tearDown() {
+ buffer.close();
+ allocator.close();
+ }
+
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.NANOSECONDS)
+ public void setZero() {
+ buffer.setZero(0, BUFFER_CAPACITY);
+ }
+
+ public static void main(String[] args) throws RunnerException {
+ Options opt = new OptionsBuilder()
+ .include(ArrowBufBenchmarks.class.getSimpleName())
+ .forks(1)
+ .build();
+
+ new Runner(opt).run();
+ }
+}
diff --git a/src/arrow/java/performance/src/test/java/org/apache/arrow/memory/util/ArrowBufPointerBenchmarks.java b/src/arrow/java/performance/src/test/java/org/apache/arrow/memory/util/ArrowBufPointerBenchmarks.java
new file mode 100644
index 000000000..8e2c9cc51
--- /dev/null
+++ b/src/arrow/java/performance/src/test/java/org/apache/arrow/memory/util/ArrowBufPointerBenchmarks.java
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory.util;
+
+import java.util.concurrent.TimeUnit;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+/**
+ * Benchmarks for {@link ArrowBufPointer}.
+ */
+@State(Scope.Benchmark)
+public class ArrowBufPointerBenchmarks {
+
+ private static final int BUFFER_CAPACITY = 1000;
+
+ private static final int ALLOCATOR_CAPACITY = 1024 * 1024;
+
+ private BufferAllocator allocator;
+
+ private ArrowBuf buffer1;
+
+ private ArrowBuf buffer2;
+
+ private ArrowBufPointer pointer1;
+
+ private ArrowBufPointer pointer2;
+
+ /**
+ * Setup benchmarks.
+ */
+ @Setup
+ public void prepare() {
+ allocator = new RootAllocator(ALLOCATOR_CAPACITY);
+ buffer1 = allocator.buffer(BUFFER_CAPACITY);
+ buffer2 = allocator.buffer(BUFFER_CAPACITY);
+
+ for (int i = 0; i < BUFFER_CAPACITY; i++) {
+ buffer1.setByte(i, i);
+ buffer2.setByte(i, i);
+ }
+
+ // make the last bytes different
+ buffer1.setByte(BUFFER_CAPACITY - 1, 12);
+ buffer1.setByte(BUFFER_CAPACITY - 1, 123);
+
+ pointer1 = new ArrowBufPointer(buffer1, 0, BUFFER_CAPACITY);
+ pointer2 = new ArrowBufPointer(buffer2, 0, BUFFER_CAPACITY);
+ }
+
+ /**
+ * Tear down benchmarks.
+ */
+ @TearDown
+ public void tearDown() {
+ buffer1.close();
+ buffer2.close();
+ allocator.close();
+ }
+
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.NANOSECONDS)
+ public int compareBenchmark() {
+ return pointer1.compareTo(pointer2);
+ }
+
+ public static void main(String[] args) throws RunnerException {
+ Options opt = new OptionsBuilder()
+ .include(ArrowBufPointerBenchmarks.class.getSimpleName())
+ .forks(1)
+ .build();
+
+ new Runner(opt).run();
+ }
+}
+
+
diff --git a/src/arrow/java/performance/src/test/java/org/apache/arrow/memory/util/ByteFunctionHelpersBenchmarks.java b/src/arrow/java/performance/src/test/java/org/apache/arrow/memory/util/ByteFunctionHelpersBenchmarks.java
new file mode 100644
index 000000000..4d0dfcb5d
--- /dev/null
+++ b/src/arrow/java/performance/src/test/java/org/apache/arrow/memory/util/ByteFunctionHelpersBenchmarks.java
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory.util;
+
+import java.util.concurrent.TimeUnit;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+/**
+ * Benchmarks for {@link ByteFunctionHelpers}.
+ */
+public class ByteFunctionHelpersBenchmarks {
+
+ private static final int ALLOCATOR_CAPACITY = 1024 * 1024;
+
+ /**
+ * State object for the {@link ByteFunctionHelpersBenchmarks#arrowBufEquals(ArrowEqualState)} benchmark.
+ */
+ @State(Scope.Benchmark)
+ public static class ArrowEqualState {
+
+ private static final int BUFFER_CAPACITY = 7;
+
+ private BufferAllocator allocator;
+
+ private ArrowBuf buffer1;
+
+ private ArrowBuf buffer2;
+
+ @Setup(Level.Trial)
+ public void prepare() {
+ allocator = new RootAllocator(ALLOCATOR_CAPACITY);
+ buffer1 = allocator.buffer(BUFFER_CAPACITY);
+ buffer2 = allocator.buffer(BUFFER_CAPACITY);
+
+ for (int i = 0; i < BUFFER_CAPACITY; i++) {
+ buffer1.setByte(i, i);
+ buffer2.setByte(i, i);
+ }
+ }
+
+ @TearDown(Level.Trial)
+ public void tearDown() {
+ buffer1.close();
+ buffer2.close();
+ allocator.close();
+ }
+ }
+
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.NANOSECONDS)
+ public void arrowBufEquals(ArrowEqualState state) {
+ ByteFunctionHelpers.equal(state.buffer1, 0, ArrowEqualState.BUFFER_CAPACITY - 1,
+ state.buffer2, 0, ArrowEqualState.BUFFER_CAPACITY - 1);
+ }
+
+ /**
+ * State object for the {@link ByteFunctionHelpersBenchmarks#arrowBufArrayEquals(ArrowArrayEqualState)} benchmark.
+ */
+ @State(Scope.Benchmark)
+ public static class ArrowArrayEqualState {
+
+ private static final int BUFFER_CAPACITY = 1024;
+
+ private BufferAllocator allocator;
+
+ private ArrowBuf buffer1;
+
+ private byte[] buffer2;
+
+ @Setup(Level.Trial)
+ public void prepare() {
+ allocator = new RootAllocator(ALLOCATOR_CAPACITY);
+ buffer1 = allocator.buffer(BUFFER_CAPACITY);
+ buffer2 = new byte[BUFFER_CAPACITY];
+
+ for (int i = 0; i < BUFFER_CAPACITY; i++) {
+ buffer1.setByte(i, i);
+ buffer2[i] = (byte) i;
+ }
+ }
+
+ @TearDown(Level.Trial)
+ public void tearDown() {
+ buffer1.close();
+ allocator.close();
+ }
+ }
+
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.NANOSECONDS)
+ public int arrowBufArrayEquals(ArrowArrayEqualState state) {
+ return ByteFunctionHelpers.compare(
+ state.buffer1, 0, ArrowArrayEqualState.BUFFER_CAPACITY,
+ state.buffer2, 0, ArrowArrayEqualState.BUFFER_CAPACITY);
+ }
+
+ public static void main(String[] args) throws RunnerException {
+ Options opt = new OptionsBuilder()
+ .include(ByteFunctionHelpersBenchmarks.class.getSimpleName())
+ .forks(1)
+ .build();
+
+ new Runner(opt).run();
+ }
+}
diff --git a/src/arrow/java/performance/src/test/java/org/apache/arrow/vector/BaseValueVectorBenchmarks.java b/src/arrow/java/performance/src/test/java/org/apache/arrow/vector/BaseValueVectorBenchmarks.java
new file mode 100644
index 000000000..5d6441cd5
--- /dev/null
+++ b/src/arrow/java/performance/src/test/java/org/apache/arrow/vector/BaseValueVectorBenchmarks.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import java.util.concurrent.TimeUnit;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+/**
+ * Benchmarks for {@link BaseValueVector}.
+ */
+@State(Scope.Benchmark)
+public class BaseValueVectorBenchmarks {
+
+ private static final int VECTOR_LENGTH = 1024;
+
+ private static final int ALLOCATOR_CAPACITY = 1024 * 1024;
+
+ private BufferAllocator allocator;
+
+ private IntVector vector;
+
+ /**
+ * Setup benchmarks.
+ */
+ @Setup
+ public void prepare() {
+ allocator = new RootAllocator(ALLOCATOR_CAPACITY);
+ vector = new IntVector("vector", allocator);
+ vector.allocateNew(VECTOR_LENGTH);
+ }
+
+ /**
+ * Tear down benchmarks.
+ */
+ @TearDown
+ public void tearDown() {
+ vector.close();
+ allocator.close();
+ }
+
+ /**
+ * Test {@link BaseValueVector#computeCombinedBufferSize(int, int)}.
+ * @return useless. To avoid DCE by JIT.
+ */
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.NANOSECONDS)
+ public int testComputeCombinedBufferSize() {
+ int totalSize = 0;
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ totalSize += vector.computeCombinedBufferSize(i, 4);
+ }
+ return totalSize;
+ }
+
+ public static void main(String [] args) throws RunnerException {
+ Options opt = new OptionsBuilder()
+ .include(BaseValueVectorBenchmarks.class.getSimpleName())
+ .forks(1)
+ .build();
+
+ new Runner(opt).run();
+ }
+
+
+}
diff --git a/src/arrow/java/performance/src/test/java/org/apache/arrow/vector/BitVectorHelperBenchmarks.java b/src/arrow/java/performance/src/test/java/org/apache/arrow/vector/BitVectorHelperBenchmarks.java
new file mode 100644
index 000000000..5f6e5ca28
--- /dev/null
+++ b/src/arrow/java/performance/src/test/java/org/apache/arrow/vector/BitVectorHelperBenchmarks.java
@@ -0,0 +1,229 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import java.util.concurrent.TimeUnit;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+/**
+ * Benchmarks for {@link BitVectorHelper}.
+ */
+public class BitVectorHelperBenchmarks {
+
+ /**
+ * State object for general benchmarks.
+ */
+ @State(Scope.Benchmark)
+ public static class BenchmarkState {
+
+ private static final int VALIDITY_BUFFER_CAPACITY = 1024;
+
+ private static final int ALLOCATOR_CAPACITY = 1024 * 1024;
+
+ private BufferAllocator allocator;
+
+ private ArrowBuf validityBuffer;
+
+ private ArrowBuf oneBitValidityBuffer;
+
+ /**
+ * Setup benchmarks.
+ */
+ @Setup(Level.Trial)
+ public void prepare() {
+ allocator = new RootAllocator(ALLOCATOR_CAPACITY);
+ validityBuffer = allocator.buffer(VALIDITY_BUFFER_CAPACITY / 8);
+
+ for (int i = 0; i < VALIDITY_BUFFER_CAPACITY; i++) {
+ if (i % 7 == 0) {
+ BitVectorHelper.setBit(validityBuffer, i);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, i);
+ }
+ }
+
+ // only one 1 bit in the middle of the buffer
+ oneBitValidityBuffer = allocator.buffer(VALIDITY_BUFFER_CAPACITY / 8);
+ oneBitValidityBuffer.setZero(0, VALIDITY_BUFFER_CAPACITY / 8);
+ BitVectorHelper.setBit(oneBitValidityBuffer, VALIDITY_BUFFER_CAPACITY / 2);
+ }
+
+ /**
+ * Tear down benchmarks.
+ */
+ @TearDown(Level.Trial)
+ public void tearDown() {
+ validityBuffer.close();
+ oneBitValidityBuffer.close();
+ allocator.close();
+ }
+ }
+
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.NANOSECONDS)
+ public int getNullCountBenchmark(BenchmarkState state) {
+ return BitVectorHelper.getNullCount(state.validityBuffer, BenchmarkState.VALIDITY_BUFFER_CAPACITY);
+ }
+
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.NANOSECONDS)
+ public boolean allBitsNullBenchmark(BenchmarkState state) {
+ return BitVectorHelper.checkAllBitsEqualTo(
+ state.oneBitValidityBuffer, BenchmarkState.VALIDITY_BUFFER_CAPACITY, true);
+ }
+
+ /**
+ * State object for {@link #loadValidityBufferAllOne(NonNullableValidityBufferState)}..
+ */
+ @State(Scope.Benchmark)
+ public static class NonNullableValidityBufferState {
+
+ private static final int VALIDITY_BUFFER_CAPACITY = 1024;
+
+ private static final int ALLOCATOR_CAPACITY = 1024 * 1024;
+
+ private BufferAllocator allocator;
+
+ private ArrowBuf validityBuffer;
+
+ private ArrowBuf loadResult;
+
+ private ArrowFieldNode fieldNode;
+
+ /**
+ * Setup benchmarks.
+ */
+ @Setup(Level.Trial)
+ public void prepare() {
+ allocator = new RootAllocator(ALLOCATOR_CAPACITY);
+ validityBuffer = allocator.buffer(VALIDITY_BUFFER_CAPACITY / 8);
+
+ for (int i = 0; i < VALIDITY_BUFFER_CAPACITY; i++) {
+ BitVectorHelper.setBit(validityBuffer, i);
+ }
+
+ fieldNode = new ArrowFieldNode(VALIDITY_BUFFER_CAPACITY, 0);
+ }
+
+ @TearDown(Level.Invocation)
+ public void tearDownInvoke() {
+ loadResult.close();
+ }
+
+ /**
+ * Tear down benchmarks.
+ */
+ @TearDown(Level.Trial)
+ public void tearDown() {
+ validityBuffer.close();
+ allocator.close();
+ }
+ }
+
+ /**
+ * Benchmark for {@link BitVectorHelper#loadValidityBuffer(ArrowFieldNode, ArrowBuf, BufferAllocator)}
+ * when all elements are not null.
+ */
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.NANOSECONDS)
+ public void loadValidityBufferAllOne(NonNullableValidityBufferState state) {
+ state.loadResult = BitVectorHelper.loadValidityBuffer(state.fieldNode, state.validityBuffer, state.allocator);
+ }
+
+ /**
+ * State object for {@link #setValidityBitBenchmark(ClearBitStateState)}.
+ */
+ @State(Scope.Benchmark)
+ public static class ClearBitStateState {
+
+ private static final int VALIDITY_BUFFER_CAPACITY = 1024;
+
+ private static final int ALLOCATOR_CAPACITY = 1024 * 1024;
+
+ private BufferAllocator allocator;
+
+ private ArrowBuf validityBuffer;
+
+ private int bitToSet = 0;
+
+ /**
+ * Setup benchmarks.
+ */
+ @Setup(Level.Trial)
+ public void prepare() {
+ allocator = new RootAllocator(ALLOCATOR_CAPACITY);
+ validityBuffer = allocator.buffer(VALIDITY_BUFFER_CAPACITY / 8);
+ }
+
+ /**
+ * Tear down benchmarks.
+ */
+ @TearDown(Level.Trial)
+ public void tearDown() {
+ validityBuffer.close();
+ allocator.close();
+ }
+ }
+
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.MICROSECONDS)
+ public void setValidityBitBenchmark(ClearBitStateState state) {
+ for (int i = 0; i < ClearBitStateState.VALIDITY_BUFFER_CAPACITY; i++) {
+ BitVectorHelper.setValidityBit(state.validityBuffer, i, state.bitToSet);
+ }
+ }
+
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.MICROSECONDS)
+ public void setValidityBitToZeroBenchmark(ClearBitStateState state) {
+ for (int i = 0; i < ClearBitStateState.VALIDITY_BUFFER_CAPACITY; i++) {
+ BitVectorHelper.unsetBit(state.validityBuffer, i);
+ }
+ }
+
+ public static void main(String [] args) throws RunnerException {
+ Options opt = new OptionsBuilder()
+ .include(BitVectorHelperBenchmarks.class.getSimpleName())
+ .forks(1)
+ .build();
+
+ new Runner(opt).run();
+ }
+}
diff --git a/src/arrow/java/performance/src/test/java/org/apache/arrow/vector/DecimalVectorBenchmarks.java b/src/arrow/java/performance/src/test/java/org/apache/arrow/vector/DecimalVectorBenchmarks.java
new file mode 100644
index 000000000..72f565990
--- /dev/null
+++ b/src/arrow/java/performance/src/test/java/org/apache/arrow/vector/DecimalVectorBenchmarks.java
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import java.math.BigDecimal;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+/**
+ * Benchmarks for {@link DecimalVector}.
+ */
+@State(Scope.Benchmark)
+public class DecimalVectorBenchmarks {
+
+ private static final int VECTOR_LENGTH = 1024;
+
+ private static final int ALLOCATOR_CAPACITY = 1024 * 1024;
+
+ private BufferAllocator allocator;
+
+ private DecimalVector vector;
+
+ private ArrowBuf fromBuf;
+
+ byte[] fromByteArray;
+
+ /**
+ * Setup benchmarks.
+ */
+ @Setup
+ public void prepare() {
+ allocator = new RootAllocator(ALLOCATOR_CAPACITY);
+ vector = new DecimalVector("vector", allocator, 38, 16);
+ vector.allocateNew(VECTOR_LENGTH);
+
+ fromBuf = allocator.buffer(VECTOR_LENGTH * DecimalVector.TYPE_WIDTH);
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ byte[] bytes = BigDecimal.valueOf(i).unscaledValue().toByteArray();
+ fromBuf.setBytes(i * DecimalVector.TYPE_WIDTH, bytes);
+ }
+
+ fromByteArray = new byte[DecimalVector.TYPE_WIDTH];
+ fromBuf.getBytes(0, fromByteArray);
+ }
+
+ /**
+ * Tear down benchmarks.
+ */
+ @TearDown
+ public void tearDown() {
+ fromBuf.close();
+ vector.close();
+ allocator.close();
+ }
+
+ /**
+ * Test writing on {@link DecimalVector} from arrow buf.
+ */
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.MICROSECONDS)
+ public void setBigEndianArrowBufBenchmark() {
+ int offset = 0;
+
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ vector.setBigEndianSafe(i, offset, fromBuf, DecimalVector.TYPE_WIDTH);
+ offset += 8;
+ }
+ }
+
+ /**
+ * Test writing on {@link DecimalVector} from byte array.
+ */
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.MICROSECONDS)
+ public void setBigEndianByteArrayBenchmark() {
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ vector.setBigEndian(i, fromByteArray);
+ }
+ }
+
+ public static void main(String [] args) throws RunnerException {
+ Options opt = new OptionsBuilder()
+ .include(DecimalVectorBenchmarks.class.getSimpleName())
+ .forks(1)
+ .build();
+
+ new Runner(opt).run();
+ }
+}
diff --git a/src/arrow/java/performance/src/test/java/org/apache/arrow/vector/Float8Benchmarks.java b/src/arrow/java/performance/src/test/java/org/apache/arrow/vector/Float8Benchmarks.java
new file mode 100644
index 000000000..874e0d9f8
--- /dev/null
+++ b/src/arrow/java/performance/src/test/java/org/apache/arrow/vector/Float8Benchmarks.java
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import java.util.concurrent.TimeUnit;
+
+import org.apache.arrow.memory.BoundsChecking;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+/**
+ * Benchmarks for {@link Float8Vector}.
+ */
+@State(Scope.Benchmark)
+public class Float8Benchmarks {
+
+ private static final int VECTOR_LENGTH = 1024;
+
+ private static final int ALLOCATOR_CAPACITY = 1024 * 1024;
+
+ private BufferAllocator allocator;
+
+ private Float8Vector vector;
+
+ private Float8Vector fromVector;
+
+ /**
+ * Setup benchmarks.
+ */
+ @Setup
+ public void prepare() {
+ allocator = new RootAllocator(ALLOCATOR_CAPACITY);
+ vector = new Float8Vector("vector", allocator);
+ vector.allocateNew(VECTOR_LENGTH);
+
+ fromVector = new Float8Vector("vector", allocator);
+ fromVector.allocateNew(VECTOR_LENGTH);
+
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ if (i % 3 == 0) {
+ fromVector.setNull(i);
+ } else {
+ fromVector.set(i, i * i);
+ }
+ }
+ fromVector.setValueCount(VECTOR_LENGTH);
+ }
+
+ /**
+ * Tear down benchmarks.
+ */
+ @TearDown
+ public void tearDown() {
+ vector.close();
+ fromVector.close();
+ allocator.close();
+ }
+
+ /**
+ * Test reading/writing on {@link Float8Vector}.
+ * The performance of this benchmark is influenced by the states of two flags:
+ * 1. The flag for boundary checking. For details, please see {@link BoundsChecking}.
+ * 2. The flag for null checking in get methods. For details, please see {@link NullCheckingForGet}.
+ * @return useless. To avoid DCE by JIT.
+ */
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.MICROSECONDS)
+ public double readWriteBenchmark() {
+ double sum = 0;
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ vector.set(i, i + 10.0);
+ sum += vector.get(i);
+ }
+ return sum;
+ }
+
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.MICROSECONDS)
+ public void copyFromBenchmark() {
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ vector.copyFrom(i, i, (Float8Vector) fromVector);
+ }
+ }
+
+ public static void main(String [] args) throws RunnerException {
+ Options opt = new OptionsBuilder()
+ .include(Float8Benchmarks.class.getSimpleName())
+ .forks(1)
+ .build();
+
+ new Runner(opt).run();
+ }
+}
diff --git a/src/arrow/java/performance/src/test/java/org/apache/arrow/vector/FloatingPointBenchmarks.java b/src/arrow/java/performance/src/test/java/org/apache/arrow/vector/FloatingPointBenchmarks.java
new file mode 100644
index 000000000..079672e9f
--- /dev/null
+++ b/src/arrow/java/performance/src/test/java/org/apache/arrow/vector/FloatingPointBenchmarks.java
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import java.util.concurrent.TimeUnit;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.compare.ApproxEqualsVisitor;
+import org.apache.arrow.vector.compare.Range;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+/**
+ * Benchmarks for floating point vectors.
+ */
+@State(Scope.Benchmark)
+public class FloatingPointBenchmarks {
+
+ private static final int VECTOR_LENGTH = 1024;
+
+ private static final int ALLOCATOR_CAPACITY = 1024 * 1024;
+
+ private BufferAllocator allocator;
+
+ private Float4Vector floatVector1;
+
+ private Float4Vector floatVector2;
+
+ private Float8Vector doubleVector1;
+
+ private Float8Vector doubleVector2;
+
+ private ApproxEqualsVisitor floatVisitor;
+
+ private ApproxEqualsVisitor doubleVisitor;
+
+ private Range range;
+
+ /**
+ * Setup benchmarks.
+ */
+ @Setup
+ public void prepare() {
+ allocator = new RootAllocator(ALLOCATOR_CAPACITY);
+ floatVector1 = new Float4Vector("vector", allocator);
+ floatVector2 = new Float4Vector("vector", allocator);
+ doubleVector1 = new Float8Vector("vector", allocator);
+ doubleVector2 = new Float8Vector("vector", allocator);
+
+ floatVector1.allocateNew(VECTOR_LENGTH);
+ floatVector2.allocateNew(VECTOR_LENGTH);
+ doubleVector1.allocateNew(VECTOR_LENGTH);
+ doubleVector2.allocateNew(VECTOR_LENGTH);
+
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ if (i % 3 == 0) {
+ floatVector1.setNull(i);
+ floatVector2.setNull(i);
+ doubleVector1.setNull(i);
+ doubleVector2.setNull(i);
+ } else {
+ floatVector1.set(i, i * i);
+ floatVector2.set(i, i * i);
+ doubleVector1.set(i, i * i);
+ doubleVector2.set(i, i * i);
+ }
+ }
+ floatVector1.setValueCount(VECTOR_LENGTH);
+ floatVector2.setValueCount(VECTOR_LENGTH);
+ doubleVector1.setValueCount(VECTOR_LENGTH);
+ doubleVector2.setValueCount(VECTOR_LENGTH);
+
+ floatVisitor = new ApproxEqualsVisitor(floatVector1, floatVector2, 0.01f, 0.01);
+ doubleVisitor = new ApproxEqualsVisitor(doubleVector1, doubleVector2, 0.01f, 0.01);
+ range = new Range(0, 0, VECTOR_LENGTH);
+ }
+
+ /**
+ * Tear down benchmarks.
+ */
+ @TearDown
+ public void tearDown() {
+ floatVector1.close();
+ floatVector2.close();
+ doubleVector1.close();
+ doubleVector2.close();
+ allocator.close();
+ }
+
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.MICROSECONDS)
+ public int approxEqualsBenchmark() {
+ boolean floatResult = floatVisitor.visit(floatVector1, range);
+ boolean doubleResult = doubleVisitor.visit(doubleVector1, range);
+ return (floatResult ? 1 : 0) + (doubleResult ? 1 : 0);
+ }
+
+ public static void main(String [] args) throws RunnerException {
+ Options opt = new OptionsBuilder()
+ .include(FloatingPointBenchmarks.class.getSimpleName())
+ .forks(1)
+ .build();
+
+ new Runner(opt).run();
+ }
+}
+
diff --git a/src/arrow/java/performance/src/test/java/org/apache/arrow/vector/IntBenchmarks.java b/src/arrow/java/performance/src/test/java/org/apache/arrow/vector/IntBenchmarks.java
new file mode 100644
index 000000000..036768d44
--- /dev/null
+++ b/src/arrow/java/performance/src/test/java/org/apache/arrow/vector/IntBenchmarks.java
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import java.util.concurrent.TimeUnit;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.complex.impl.IntWriterImpl;
+import org.apache.arrow.vector.holders.NullableIntHolder;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+/**
+ * Benchmarks for {@link IntVector}.
+ */
+@State(Scope.Benchmark)
+public class IntBenchmarks {
+
+ private static final int VECTOR_LENGTH = 1024;
+
+ private static final int ALLOCATOR_CAPACITY = 1024 * 1024;
+
+ private BufferAllocator allocator;
+
+ private IntVector vector;
+
+ @Setup
+ public void prepare() {
+ allocator = new RootAllocator(ALLOCATOR_CAPACITY);
+ vector = new IntVector("vector", allocator);
+ vector.allocateNew(VECTOR_LENGTH);
+ vector.setValueCount(VECTOR_LENGTH);
+ }
+
+ @TearDown
+ public void tearDown() {
+ vector.close();
+ allocator.close();
+ }
+
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.MICROSECONDS)
+ public void setWithValueHolder() {
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ NullableIntHolder holder = new NullableIntHolder();
+ holder.isSet = i % 3 == 0 ? 0 : 1;
+ if (holder.isSet == 1) {
+ holder.value = i;
+ }
+ vector.setSafe(i, holder);
+ }
+ }
+
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.MICROSECONDS)
+ public void setIntDirectly() {
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ vector.setSafe(i, i % 3 == 0 ? 0 : 1, i);
+ }
+ }
+
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.MICROSECONDS)
+ public void setWithWriter() {
+ IntWriterImpl writer = new IntWriterImpl(vector);
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ if (i % 3 != 0) {
+ writer.writeInt(i);
+ }
+ }
+ }
+
+ public static void main(String [] args) throws RunnerException {
+ Options opt = new OptionsBuilder()
+ .include(IntBenchmarks.class.getSimpleName())
+ .forks(1)
+ .build();
+
+ new Runner(opt).run();
+ }
+}
diff --git a/src/arrow/java/performance/src/test/java/org/apache/arrow/vector/VarCharBenchmarks.java b/src/arrow/java/performance/src/test/java/org/apache/arrow/vector/VarCharBenchmarks.java
new file mode 100644
index 000000000..1ab4b7bc2
--- /dev/null
+++ b/src/arrow/java/performance/src/test/java/org/apache/arrow/vector/VarCharBenchmarks.java
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import java.util.concurrent.TimeUnit;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+/**
+ * Benchmarks for {@link VarCharVector}.
+ */
+@State(Scope.Benchmark)
+public class VarCharBenchmarks {
+
+ private static final int VECTOR_LENGTH = 1024;
+
+ private static final int ALLOCATOR_CAPACITY = 1024 * 1024;
+
+ private BufferAllocator allocator;
+
+ private VarCharVector vector;
+
+ private VarCharVector fromVector;
+
+ /**
+ * Setup benchmarks.
+ */
+ @Setup
+ public void prepare() {
+ allocator = new RootAllocator(ALLOCATOR_CAPACITY);
+ vector = new VarCharVector("vector", allocator);
+ vector.allocateNew(ALLOCATOR_CAPACITY / 4, VECTOR_LENGTH);
+
+ fromVector = new VarCharVector("vector", allocator);
+ fromVector.allocateNew(ALLOCATOR_CAPACITY / 4, VECTOR_LENGTH);
+
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ if (i % 3 == 0) {
+ fromVector.setNull(i);
+ } else {
+ fromVector.set(i, String.valueOf(i * 1000).getBytes());
+ }
+ }
+ fromVector.setValueCount(VECTOR_LENGTH);
+ }
+
+ /**
+ * Tear down benchmarks.
+ */
+ @TearDown
+ public void tearDown() {
+ vector.close();
+ fromVector.close();
+ allocator.close();
+ }
+
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.MICROSECONDS)
+ public void copyFromBenchmark() {
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ vector.copyFrom(i, i, fromVector);
+ }
+ }
+
+ public static void main(String [] args) throws RunnerException {
+ Options opt = new OptionsBuilder()
+ .include(VarCharBenchmarks.class.getSimpleName())
+ .forks(1)
+ .build();
+
+ new Runner(opt).run();
+ }
+}
diff --git a/src/arrow/java/performance/src/test/java/org/apache/arrow/vector/VariableWidthVectorBenchmarks.java b/src/arrow/java/performance/src/test/java/org/apache/arrow/vector/VariableWidthVectorBenchmarks.java
new file mode 100644
index 000000000..7eee981f1
--- /dev/null
+++ b/src/arrow/java/performance/src/test/java/org/apache/arrow/vector/VariableWidthVectorBenchmarks.java
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import java.util.concurrent.TimeUnit;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.holders.NullableVarCharHolder;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+/**
+ * Benchmarks for {@link BaseVariableWidthVector}.
+ */
+@State(Scope.Benchmark)
+public class VariableWidthVectorBenchmarks {
+
+ private static final int VECTOR_CAPACITY = 16 * 1024;
+
+ private static final int VECTOR_LENGTH = 1024;
+
+ private static final int ALLOCATOR_CAPACITY = 1024 * 1024;
+
+ private static byte[] bytes = VariableWidthVectorBenchmarks.class.getName().getBytes();
+ private ArrowBuf arrowBuff;
+
+ private BufferAllocator allocator;
+
+ private VarCharVector vector;
+
+ /**
+ * Setup benchmarks.
+ */
+ @Setup
+ public void prepare() {
+ allocator = new RootAllocator(ALLOCATOR_CAPACITY);
+ vector = new VarCharVector("vector", allocator);
+ vector.allocateNew(VECTOR_CAPACITY, VECTOR_LENGTH);
+ arrowBuff = allocator.buffer(VECTOR_LENGTH);
+ arrowBuff.setBytes(0, bytes, 0, bytes.length);
+ }
+
+ /**
+ * Tear down benchmarks.
+ */
+ @TearDown
+ public void tearDown() {
+ arrowBuff.close();
+ vector.close();
+ allocator.close();
+ }
+
+ /**
+ * Test {@link BaseVariableWidthVector#getValueCapacity()}.
+ * @return useless. To avoid DCE by JIT.
+ */
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.NANOSECONDS)
+ public int getValueCapacity() {
+ return vector.getValueCapacity();
+ }
+
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.MILLISECONDS)
+ public int setSafeFromArray() {
+ for (int i = 0; i < 500; ++i) {
+ vector.setSafe(i * 40, bytes);
+ }
+ return vector.getBufferSize();
+ }
+
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.MILLISECONDS)
+ public int setSafeFromNullableVarcharHolder() {
+ NullableVarCharHolder nvch = new NullableVarCharHolder();
+ nvch.buffer = arrowBuff;
+ nvch.start = 0;
+ nvch.end = bytes.length;
+ for (int i = 0; i < 50; ++i) {
+ nvch.isSet = 0;
+ for (int j = 0; j < 9; ++j) {
+ int idx = 10 * i + j;
+ vector.setSafe(idx, nvch);
+ }
+ nvch.isSet = 1;
+ vector.setSafe(10 * (i + 1), nvch);
+ }
+ return vector.getBufferSize();
+ }
+
+
+ public static void main(String [] args) throws RunnerException {
+ Options opt = new OptionsBuilder()
+ .include(VariableWidthVectorBenchmarks.class.getSimpleName())
+ .forks(1)
+ .build();
+
+ new Runner(opt).run();
+ }
+}
diff --git a/src/arrow/java/performance/src/test/java/org/apache/arrow/vector/VectorLoaderBenchmark.java b/src/arrow/java/performance/src/test/java/org/apache/arrow/vector/VectorLoaderBenchmark.java
new file mode 100644
index 000000000..416d12641
--- /dev/null
+++ b/src/arrow/java/performance/src/test/java/org/apache/arrow/vector/VectorLoaderBenchmark.java
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import java.util.concurrent.TimeUnit;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+/**
+ * Benchmarks for {@link VectorLoader}.
+ */
+public class VectorLoaderBenchmark {
+
+ private static final int ALLOCATOR_CAPACITY = 1024 * 1024;
+
+ private static final int VECTOR_COUNT = 10;
+
+ /**
+ * State for vector load benchmark.
+ */
+ @State(Scope.Benchmark)
+ public static class LoadState {
+
+ private BufferAllocator allocator;
+
+ private VarCharVector[] vectors;
+
+ private ArrowRecordBatch recordBatch;
+
+ private VectorSchemaRoot root;
+
+ private VectorLoader loader;
+
+ /**
+ * Setup benchmarks.
+ */
+ @Setup(Level.Trial)
+ public void prepare() {
+ allocator = new RootAllocator(ALLOCATOR_CAPACITY);
+ }
+
+ @Setup(Level.Invocation)
+ public void prepareInvoke() {
+ vectors = new VarCharVector[VECTOR_COUNT];
+ for (int i = 0; i < VECTOR_COUNT; i++) {
+ vectors[i] = new VarCharVector("vector", allocator);
+ vectors[i].allocateNew(100, 10);
+ }
+
+ root = VectorSchemaRoot.of(vectors);
+ VectorUnloader unloader = new VectorUnloader(root);
+ recordBatch = unloader.getRecordBatch();
+
+ loader = new VectorLoader(root);
+ }
+
+ @TearDown(Level.Invocation)
+ public void tearDownInvoke() {
+ recordBatch.close();
+ root.close();
+ }
+
+ /**
+ * Tear down benchmarks.
+ */
+ @TearDown(Level.Trial)
+ public void tearDown() {
+ allocator.close();
+ }
+ }
+
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.MICROSECONDS)
+ public void loadBenchmark(LoadState state) {
+ state.loader.load(state.recordBatch);
+ }
+
+ public static void main(String[] args) throws RunnerException {
+ Options opt = new OptionsBuilder()
+ .include(VectorLoaderBenchmark.class.getSimpleName())
+ .forks(1)
+ .build();
+
+ new Runner(opt).run();
+ }
+}
diff --git a/src/arrow/java/performance/src/test/java/org/apache/arrow/vector/VectorUnloaderBenchmark.java b/src/arrow/java/performance/src/test/java/org/apache/arrow/vector/VectorUnloaderBenchmark.java
new file mode 100644
index 000000000..d12517245
--- /dev/null
+++ b/src/arrow/java/performance/src/test/java/org/apache/arrow/vector/VectorUnloaderBenchmark.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import java.util.concurrent.TimeUnit;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+/**
+ * Benchmarks for {@link VectorUnloader}.
+ */
+@State(Scope.Benchmark)
+public class VectorUnloaderBenchmark {
+
+ private static final int ALLOCATOR_CAPACITY = 1024 * 1024;
+
+ private static final int VECTOR_COUNT = 10;
+
+ private BufferAllocator allocator;
+
+ private VarCharVector [] vectors;
+
+ private VectorUnloader unloader;
+
+ private ArrowRecordBatch recordBatch;
+
+ /**
+ * Setup benchmarks.
+ */
+ @Setup(Level.Trial)
+ public void prepare() {
+ allocator = new RootAllocator(ALLOCATOR_CAPACITY);
+ }
+
+ @Setup(Level.Invocation)
+ public void prepareInvoke() {
+ vectors = new VarCharVector[VECTOR_COUNT];
+ for (int i = 0; i < VECTOR_COUNT; i++) {
+ vectors[i] = new VarCharVector("vector", allocator);
+ vectors[i].allocateNew(100, 10);
+ }
+
+ unloader = new VectorUnloader(VectorSchemaRoot.of(vectors));
+ }
+
+ @TearDown(Level.Invocation)
+ public void tearDownInvoke() {
+ if (recordBatch != null) {
+ recordBatch.close();
+ }
+ for (int i = 0; i < VECTOR_COUNT; i++) {
+ vectors[i].close();
+ }
+ }
+
+ /**
+ * Tear down benchmarks.
+ */
+ @TearDown(Level.Trial)
+ public void tearDown() {
+ allocator.close();
+ }
+
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.MICROSECONDS)
+ public void unloadBenchmark() {
+ recordBatch = unloader.getRecordBatch();
+ }
+
+ public static void main(String[] args) throws RunnerException {
+ Options opt = new OptionsBuilder()
+ .include(VectorUnloaderBenchmark.class.getSimpleName())
+ .forks(1)
+ .build();
+
+ new Runner(opt).run();
+ }
+}
diff --git a/src/arrow/java/performance/src/test/java/org/apache/arrow/vector/dictionary/DictionaryEncoderBenchmarks.java b/src/arrow/java/performance/src/test/java/org/apache/arrow/vector/dictionary/DictionaryEncoderBenchmarks.java
new file mode 100644
index 000000000..6dd887a32
--- /dev/null
+++ b/src/arrow/java/performance/src/test/java/org/apache/arrow/vector/dictionary/DictionaryEncoderBenchmarks.java
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.dictionary;
+
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+/**
+ * Benchmarks for {@link DictionaryEncoder}.
+ */
+@State(Scope.Benchmark)
+public class DictionaryEncoderBenchmarks {
+
+ private BufferAllocator allocator;
+
+ private static final int DATA_SIZE = 1000;
+ private static final int KEY_SIZE = 100;
+
+
+ private static final int KEY_LENGTH = 10;
+
+ private List<String> keys = new ArrayList<>();
+
+ private VarCharVector vector;
+
+ private VarCharVector dictionaryVector;
+
+ /**
+ * Setup benchmarks.
+ */
+ @Setup
+ public void prepare() {
+
+ for (int i = 0; i < KEY_SIZE; i++) {
+ keys.add(generateUniqueKey(KEY_LENGTH));
+ }
+
+ allocator = new RootAllocator(10 * 1024 * 1024);
+
+ vector = new VarCharVector("vector", allocator);
+ dictionaryVector = new VarCharVector("dict", allocator);
+
+ vector.allocateNew(10240, DATA_SIZE);
+ vector.setValueCount(DATA_SIZE);
+ for (int i = 0; i < DATA_SIZE; i++) {
+ byte[] value = keys.get(generateRandomIndex(KEY_SIZE)).getBytes(StandardCharsets.UTF_8);
+ vector.setSafe(i, value, 0, value.length);
+ }
+
+ dictionaryVector.allocateNew(1024, 100);
+ dictionaryVector.setValueCount(100);
+ for (int i = 0; i < KEY_SIZE; i++) {
+ byte[] value = keys.get(i).getBytes(StandardCharsets.UTF_8);
+ dictionaryVector.setSafe(i, value, 0, value.length);
+ }
+
+ }
+
+ /**
+ * Tear down benchmarks.
+ */
+ @TearDown
+ public void tearDown() {
+ vector.close();
+ dictionaryVector.close();
+ keys.clear();
+ allocator.close();
+ }
+
+ /**
+ * Test encode for {@link DictionaryEncoder}.
+ * @return useless. To avoid DCE by JIT.
+ */
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.NANOSECONDS)
+ public int testEncode() {
+ Dictionary dictionary = new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null));
+ final ValueVector encoded = DictionaryEncoder.encode(vector, dictionary);
+ encoded.close();
+ return 0;
+ }
+
+ private int generateRandomIndex(int max) {
+ Random random = new Random();
+ return random.nextInt(max);
+ }
+
+ private String generateUniqueKey(int length) {
+ String str = "abcdefghijklmnopqrstuvwxyz";
+ Random random = new Random();
+ StringBuffer sb = new StringBuffer();
+ for (int i = 0; i < length; i++) {
+ int number = random.nextInt(26);
+ sb.append(str.charAt(number));
+ }
+ if (keys.contains(sb.toString())) {
+ return generateUniqueKey(length);
+ }
+ return sb.toString();
+ }
+
+ public static void main(String[] args) throws RunnerException {
+ Options opt = new OptionsBuilder()
+ .include(DictionaryEncoderBenchmarks.class.getSimpleName())
+ .forks(1)
+ .build();
+
+ new Runner(opt).run();
+ }
+}
diff --git a/src/arrow/java/performance/src/test/java/org/apache/arrow/vector/ipc/WriteChannelBenchmark.java b/src/arrow/java/performance/src/test/java/org/apache/arrow/vector/ipc/WriteChannelBenchmark.java
new file mode 100644
index 000000000..7a2537cbb
--- /dev/null
+++ b/src/arrow/java/performance/src/test/java/org/apache/arrow/vector/ipc/WriteChannelBenchmark.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.channels.Channels;
+import java.util.concurrent.TimeUnit;
+
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+/**
+ * Benchmarks for {@link WriteChannel}.
+ */
+public class WriteChannelBenchmark {
+
+ /**
+ * State object for align benchmark.
+ */
+ @State(Scope.Benchmark)
+ public static class AlignState {
+
+ private ByteArrayOutputStream baos;
+
+ private WriteChannel writeChannel;
+
+ @Param({"1", "2", "3", "4", "5", "6", "7"})
+ public int alignSize;
+
+ @Setup(Level.Invocation)
+ public void prepareInvoke() throws IOException {
+ baos = new ByteArrayOutputStream(8);
+ writeChannel = new WriteChannel(Channels.newChannel(baos));
+ writeChannel.write(new byte[8 - alignSize]);
+ }
+
+ @TearDown(Level.Invocation)
+ public void tearDownInvoke() throws IOException {
+ writeChannel.close();
+ baos.close();
+ }
+ }
+
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.NANOSECONDS)
+ public void alignBenchmark(AlignState state) throws IOException {
+ state.writeChannel.align();
+ }
+
+ public static void main(String[] args) throws RunnerException {
+ Options opt = new OptionsBuilder()
+ .include(WriteChannelBenchmark.class.getSimpleName())
+ .forks(1)
+ .build();
+
+ new Runner(opt).run();
+ }
+}
diff --git a/src/arrow/java/performance/src/test/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatchBenchmarks.java b/src/arrow/java/performance/src/test/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatchBenchmarks.java
new file mode 100644
index 000000000..c0882821e
--- /dev/null
+++ b/src/arrow/java/performance/src/test/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatchBenchmarks.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc.message;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.VarCharVector;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+/**
+ * Benchmarks for {@link ArrowRecordBatch}.
+ */
+@State(Scope.Benchmark)
+public class ArrowRecordBatchBenchmarks {
+
+ private static final int VECTOR_CAPACITY = 16 * 1024;
+
+ private static final int VECTOR_LENGTH = 1024;
+
+ private static final int ALLOCATOR_CAPACITY = 1024 * 1024;
+
+ private BufferAllocator allocator;
+
+ private VarCharVector vector;
+
+ private List<ArrowFieldNode> nodes;
+
+ /**
+ * Setup benchmarks.
+ */
+ @Setup
+ public void prepare() {
+ allocator = new RootAllocator(ALLOCATOR_CAPACITY);
+ vector = new VarCharVector("vector", allocator);
+ vector.allocateNew(VECTOR_CAPACITY, VECTOR_LENGTH);
+
+ nodes = new ArrayList<>();
+ nodes.add(new ArrowFieldNode(VECTOR_LENGTH, 0));
+ nodes.add(new ArrowFieldNode(VECTOR_LENGTH, 0));
+ }
+
+ /**
+ * Tear down benchmarks.
+ */
+ @TearDown
+ public void tearDown() {
+ vector.close();
+ allocator.close();
+ }
+
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.NANOSECONDS)
+ public long createAndGetLength() {
+ try (ArrowRecordBatch batch = new ArrowRecordBatch(VECTOR_LENGTH, nodes, vector.getFieldBuffers())) {
+ return batch.computeBodyLength();
+ }
+ }
+
+ public static void main(String [] args) throws RunnerException {
+ Options opt = new OptionsBuilder()
+ .include(ArrowRecordBatchBenchmarks.class.getSimpleName())
+ .forks(1)
+ .build();
+
+ new Runner(opt).run();
+ }
+}
diff --git a/src/arrow/java/performance/src/test/java/org/apache/arrow/vector/util/TransferPairBenchmarks.java b/src/arrow/java/performance/src/test/java/org/apache/arrow/vector/util/TransferPairBenchmarks.java
new file mode 100644
index 000000000..235eca53c
--- /dev/null
+++ b/src/arrow/java/performance/src/test/java/org/apache/arrow/vector/util/TransferPairBenchmarks.java
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import java.nio.charset.StandardCharsets;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+/**
+ * Benchmarks for {@link TransferPair}.
+ */
+@State(Scope.Benchmark)
+public class TransferPairBenchmarks {
+
+ private static final int VECTOR_LENGTH = 1024;
+
+ private static final int ALLOCATOR_CAPACITY = 1024 * 1024;
+
+ private BufferAllocator allocator;
+
+ private IntVector intVector;
+
+ private VarCharVector varCharVector;
+
+ /**
+ * Setup benchmarks.
+ */
+ @Setup
+ public void prepare() {
+ allocator = new RootAllocator(ALLOCATOR_CAPACITY);
+ intVector = new IntVector("intVector", allocator);
+ varCharVector = new VarCharVector("varcharVector", allocator);
+
+ intVector.allocateNew(VECTOR_LENGTH);
+ varCharVector.allocateNew(VECTOR_LENGTH);
+
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ if (i % 3 == 0) {
+ intVector.setNull(i);
+ varCharVector.setNull(i);
+ } else {
+ intVector.setSafe(i, i * i);
+ varCharVector.setSafe(i, ("teststring" + i).getBytes(StandardCharsets.UTF_8));
+ }
+ }
+ intVector.setValueCount(VECTOR_LENGTH);
+ varCharVector.setValueCount(VECTOR_LENGTH);
+ }
+
+ /**
+ * Tear down benchmarks.
+ */
+ @TearDown
+ public void tearDown() {
+ intVector.close();
+ varCharVector.close();;
+ allocator.close();
+ }
+
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.MICROSECONDS)
+ public int splitAndTransferIntVector() {
+ IntVector toVector = new IntVector("intVector", allocator);
+ toVector.setValueCount(VECTOR_LENGTH);
+ TransferPair transferPair = intVector.makeTransferPair(toVector);
+ transferPair.splitAndTransfer(0, VECTOR_LENGTH);
+ toVector.close();
+ return 0;
+ }
+
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.MICROSECONDS)
+ public int splitAndTransferVarcharVector() {
+ VarCharVector toVector = new VarCharVector("varcharVector", allocator);
+ toVector.setValueCount(VECTOR_LENGTH);
+ TransferPair transferPair = varCharVector.makeTransferPair(toVector);
+ transferPair.splitAndTransfer(0, VECTOR_LENGTH);
+ toVector.close();
+ return 0;
+ }
+
+ public static void main(String [] args) throws RunnerException {
+ Options opt = new OptionsBuilder()
+ .include(TransferPairBenchmarks.class.getSimpleName())
+ .forks(1)
+ .build();
+
+ new Runner(opt).run();
+ }
+}
diff --git a/src/arrow/java/plasma/README.md b/src/arrow/java/plasma/README.md
new file mode 100644
index 000000000..0dcb4e21f
--- /dev/null
+++ b/src/arrow/java/plasma/README.md
@@ -0,0 +1,39 @@
+<!---
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+# Java Plasma Client
+
+## Setup Build Environment
+
+Install:
+ - java 8 or later
+ - maven 3.3 or later
+ - the same requirement of build [Arrow C++](https://github.com/apache/arrow/tree/master/cpp)
+
+## Build the jar of plasma client
+
+```
+cd ..
+mvn clean install -pl plasma -am -Dmaven.test.skip
+```
+
+## Building and running tests
+```
+./test.sh
+```
diff --git a/src/arrow/java/plasma/pom.xml b/src/arrow/java/plasma/pom.xml
new file mode 100644
index 000000000..725b414b1
--- /dev/null
+++ b/src/arrow/java/plasma/pom.xml
@@ -0,0 +1,34 @@
+<?xml version="1.0"?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+ license agreements. See the NOTICE file distributed with this work for additional
+ information regarding copyright ownership. The ASF licenses this file to
+ You under the Apache License, Version 2.0 (the "License"); you may not use
+ this file except in compliance with the License. You may obtain a copy of
+ the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+ by applicable law or agreed to in writing, software distributed under the
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ OF ANY KIND, either express or implied. See the License for the specific
+ language governing permissions and limitations under the License. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-java-root</artifactId>
+ <version>6.0.1</version>
+ </parent>
+ <artifactId>arrow-plasma</artifactId>
+ <name>Arrow Plasma Client</name>
+ <description>(Experimental/Contrib) Java client for the Plasma object store.</description>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <configuration>
+ <source>1.8</source>
+ <target>1.8</target>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+</project>
diff --git a/src/arrow/java/plasma/src/main/java/org/apache/arrow/plasma/ObjectStoreLink.java b/src/arrow/java/plasma/src/main/java/org/apache/arrow/plasma/ObjectStoreLink.java
new file mode 100644
index 000000000..93a2d483c
--- /dev/null
+++ b/src/arrow/java/plasma/src/main/java/org/apache/arrow/plasma/ObjectStoreLink.java
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.plasma;
+
+import java.util.List;
+
+import org.apache.arrow.plasma.exceptions.DuplicateObjectException;
+import org.apache.arrow.plasma.exceptions.PlasmaOutOfMemoryException;
+
+/**
+ * Object store interface, which provides the capabilities to put and get raw byte array, and serves.
+ */
+public interface ObjectStoreLink {
+
+ /**
+ * Tuple for data and metadata stored in Plasma.
+ */
+ class ObjectStoreData {
+
+ public ObjectStoreData(byte[] metadata, byte[] data) {
+ this.data = data;
+ this.metadata = metadata;
+ }
+
+ public final byte[] metadata;
+ public final byte[] data;
+ }
+
+ /**
+ * Put value in the local plasma store with object ID <tt>objectId</tt>.
+ *
+ * @param objectId The object ID of the value to be put.
+ * @param value The value to put in the object store.
+ * @param metadata encodes whatever metadata the user wishes to encode.
+ */
+ void put(byte[] objectId, byte[] value, byte[] metadata)
+ throws DuplicateObjectException, PlasmaOutOfMemoryException;
+
+ /**
+ * Get a buffer from the PlasmaStore based on the <tt>objectId</tt>.
+ *
+ * @param objectId The object ID used to identify the object.
+ * @param timeoutMs The number of milliseconds that the get call should block before timing out
+ * and returning. Pass -1 if the call should block and 0 if the call should return immediately.
+ * @param isMetadata false if get data, otherwise get metadata.
+ * @return A PlasmaBuffer wrapping the object.
+ */
+ default byte[] get(byte[] objectId, int timeoutMs, boolean isMetadata) {
+ byte[][] objectIds = {objectId};
+ return get(objectIds, timeoutMs, isMetadata).get(0);
+ }
+
+ /**
+ * Get buffers from the PlasmaStore based on <tt>objectIds</tt>.
+ *
+ * @param objectIds List of object IDs used to identify some objects.
+ * @param timeoutMs The number of milliseconds that the get call should block before timing out
+ * and returning. Pass -1 if the call should block and 0 if the call should return immediately.
+ * @param isMetadata false if get data, otherwise get metadata.
+ * @return List of PlasmaBuffers wrapping objects.
+ */
+ List<byte[]> get(byte[][] objectIds, int timeoutMs, boolean isMetadata);
+
+ /**
+ * Get buffer pairs (data & metadata) from the PlasmaStore based on <tt>objectIds</tt>.
+ *
+ * @param objectIds List of object IDs used to identify some objects.
+ * @param timeoutMs The number of milliseconds that the get call should block before timing out
+ * and returning. Pass -1 if the call should block and 0 if the call should return immediately.
+ * @return List of Pairs of PlasmaBuffer wrapping objects and its metadata.
+ */
+ List<ObjectStoreData> get(byte[][] objectIds, int timeoutMs);
+
+ /**
+ * Compute the hash of an object in the object store.
+ *
+ * @param objectId The object ID used to identify the object.
+ * @return A digest byte array contains object's SHA256 hash. <tt>null</tt> means that the object
+ * isn't in the object store.
+ */
+ byte[] hash(byte[] objectId);
+
+ /**
+ * Evict some objects to recover given count of bytes.
+ *
+ * @param numBytes The number of bytes to attempt to recover.
+ * @return The number of bytes that have been evicted.
+ */
+ long evict(long numBytes);
+
+ /**
+ * Release the reference of the object.
+ *
+ * @param objectId The object ID used to release the reference of the object.
+ */
+ void release(byte[] objectId);
+
+ /**
+ * Removes object with given objectId from plasma store.
+ *
+ * @param objectId used to identify an object.
+ */
+ void delete(byte[] objectId);
+
+ /**
+ * Check if the object is present and has been sealed in the PlasmaStore.
+ *
+ * @param objectId used to identify an object.
+ */
+ boolean contains(byte[] objectId);
+
+ /**
+ * List all objects in the PlasmaStore.
+ */
+ List<byte[]> list();
+}
diff --git a/src/arrow/java/plasma/src/main/java/org/apache/arrow/plasma/PlasmaClient.java b/src/arrow/java/plasma/src/main/java/org/apache/arrow/plasma/PlasmaClient.java
new file mode 100644
index 000000000..fdd7114f1
--- /dev/null
+++ b/src/arrow/java/plasma/src/main/java/org/apache/arrow/plasma/PlasmaClient.java
@@ -0,0 +1,184 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.plasma;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.arrow.plasma.exceptions.DuplicateObjectException;
+import org.apache.arrow.plasma.exceptions.PlasmaOutOfMemoryException;
+
+/**
+ * The PlasmaClient is used to interface with a plasma store and manager.
+ *
+ * <p>The PlasmaClient can ask the PlasmaStore to allocate a new buffer, seal a buffer, and get a
+ * buffer. Buffers are referred to by object IDs.
+ */
+public class PlasmaClient implements ObjectStoreLink {
+
+ private final long conn;
+
+ protected void finalize() {
+ PlasmaClientJNI.disconnect(this.conn);
+ }
+
+ // use plasma client to initialize the underlying jni system as well via config and config-overwrites
+ public PlasmaClient(String storeSocketName, String managerSocketName, int releaseDelay) {
+ this.conn = PlasmaClientJNI.connect(storeSocketName, managerSocketName, releaseDelay);
+ }
+
+ // interface methods --------------------
+
+ @Override
+ public void put(byte[] objectId, byte[] value, byte[] metadata)
+ throws DuplicateObjectException, PlasmaOutOfMemoryException {
+ ByteBuffer buf = PlasmaClientJNI.create(conn, objectId, value.length, metadata);
+ buf.put(value);
+ PlasmaClientJNI.seal(conn, objectId);
+ PlasmaClientJNI.release(conn, objectId);
+ }
+
+ @Override
+ public List<byte[]> get(byte[][] objectIds, int timeoutMs, boolean isMetadata) {
+ ByteBuffer[][] bufs = PlasmaClientJNI.get(conn, objectIds, timeoutMs);
+ assert bufs.length == objectIds.length;
+
+ List<byte[]> ret = new ArrayList<>();
+ for (int i = 0; i < bufs.length; i++) {
+ ByteBuffer buf = bufs[i][isMetadata ? 1 : 0];
+ if (buf == null) {
+ ret.add(null);
+ } else {
+ byte[] bb = new byte[buf.remaining()];
+ buf.get(bb);
+ ret.add(bb);
+ }
+ }
+ return ret;
+ }
+
+ @Override
+ public byte[] hash(byte[] objectId) {
+ return PlasmaClientJNI.hash(conn, objectId);
+ }
+
+ @Override
+ public List<ObjectStoreData> get(byte[][] objectIds, int timeoutMs) {
+ ByteBuffer[][] bufs = PlasmaClientJNI.get(conn, objectIds, timeoutMs);
+ assert bufs.length == objectIds.length;
+
+ List<ObjectStoreData> ret = new ArrayList<>();
+ for (int i = 0; i < bufs.length; i++) {
+ ByteBuffer databuf = bufs[i][0];
+ ByteBuffer metabuf = bufs[i][1];
+ if (databuf == null) {
+ ret.add(new ObjectStoreData(null, null));
+ } else {
+ byte[] data = new byte[databuf.remaining()];
+ databuf.get(data);
+ byte[] meta;
+ if (metabuf != null) {
+ meta = new byte[metabuf.remaining()];
+ metabuf.get(meta);
+ } else {
+ meta = null;
+ }
+ ret.add(new ObjectStoreData(meta, data));
+ }
+ }
+ return ret;
+ }
+
+ /**
+ * Get an object in Plasma Store with objectId. Will return an off-heap ByteBuffer.
+ *
+ * @param objectId used to identify an object.
+ * @param timeoutMs time in milliseconfs to wait before this request time out.
+ * @param isMetadata get this object's metadata or data.
+ */
+ public ByteBuffer getObjAsByteBuffer(byte[] objectId, int timeoutMs, boolean isMetadata) {
+ byte[][] objectIds = new byte[][]{objectId};
+ ByteBuffer[][] bufs = PlasmaClientJNI.get(conn, objectIds, timeoutMs);
+ return bufs[0][isMetadata ? 1 : 0];
+ }
+
+ @Override
+ public List<byte[]> list() {
+ return Arrays.asList(PlasmaClientJNI.list(conn));
+ }
+
+ @Override
+ public long evict(long numBytes) {
+ return PlasmaClientJNI.evict(conn, numBytes);
+ }
+
+ // wrapper methods --------------------
+
+ /**
+ * Create an object in Plasma Store with particular size. Will return an off-heap ByteBuffer.
+ *
+ * @param objectId used to identify an object.
+ * @param size size in bytes to be allocated for this object.
+ * @param metadata this object's metadata. It should be null if there is no metadata.
+ */
+ public ByteBuffer create(byte[] objectId, int size, byte[] metadata)
+ throws DuplicateObjectException, PlasmaOutOfMemoryException {
+ return PlasmaClientJNI.create(conn, objectId, size, metadata);
+ }
+
+ /**
+ * Seal the buffer in the PlasmaStore for a particular object ID.
+ * Once a buffer has been sealed, the buffer is immutable and can only be accessed through get.
+ *
+ * @param objectId used to identify an object.
+ */
+ public void seal(byte[] objectId) {
+ PlasmaClientJNI.seal(conn, objectId);
+ }
+
+ /**
+ * Notify Plasma that the object is no longer needed.
+ *
+ * @param objectId used to identify an object.
+ */
+ public void release(byte[] objectId) {
+ PlasmaClientJNI.release(conn, objectId);
+ }
+
+ /**
+ * Removes object with given objectId from plasma store.
+ *
+ * @param objectId used to identify an object.
+ */
+ @Override
+ public void delete(byte[] objectId) {
+ PlasmaClientJNI.delete(conn, objectId);
+ }
+
+ /**
+ * Check if the object is present and has been sealed in the PlasmaStore.
+ *
+ * @param objectId used to identify an object.
+ */
+ @Override
+ public boolean contains(byte[] objectId) {
+ return PlasmaClientJNI.contains(conn, objectId);
+ }
+}
diff --git a/src/arrow/java/plasma/src/main/java/org/apache/arrow/plasma/PlasmaClientJNI.java b/src/arrow/java/plasma/src/main/java/org/apache/arrow/plasma/PlasmaClientJNI.java
new file mode 100644
index 000000000..da5c17e6b
--- /dev/null
+++ b/src/arrow/java/plasma/src/main/java/org/apache/arrow/plasma/PlasmaClientJNI.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.plasma;
+
+import java.nio.ByteBuffer;
+
+import org.apache.arrow.plasma.exceptions.DuplicateObjectException;
+import org.apache.arrow.plasma.exceptions.PlasmaOutOfMemoryException;
+
+/**
+ * JNI static methods for PlasmaClient.
+ */
+public class PlasmaClientJNI {
+
+ public static native long connect(String storeSocketName, String managerSocketName, int releaseDelay);
+
+ public static native void disconnect(long conn);
+
+ public static native ByteBuffer create(long conn, byte[] objectId, int size, byte[] metadata)
+ throws DuplicateObjectException, PlasmaOutOfMemoryException;
+
+ public static native byte[] hash(long conn, byte[] objectId);
+
+ public static native void seal(long conn, byte[] objectId);
+
+ public static native void release(long conn, byte[] objectId);
+
+ public static native ByteBuffer[][] get(long conn, byte[][] objectIds, int timeoutMs);
+
+ public static native void delete(long conn, byte[] objectId);
+
+ public static native boolean contains(long conn, byte[] objectId);
+
+ public static native void fetch(long conn, byte[][] objectIds);
+
+ public static native byte[][] wait(long conn, byte[][] objectIds, int timeoutMs,
+ int numReturns);
+
+ public static native long evict(long conn, long numBytes);
+
+ public static native byte[][] list(long conn);
+}
diff --git a/src/arrow/java/plasma/src/main/java/org/apache/arrow/plasma/exceptions/DuplicateObjectException.java b/src/arrow/java/plasma/src/main/java/org/apache/arrow/plasma/exceptions/DuplicateObjectException.java
new file mode 100644
index 000000000..cb735282c
--- /dev/null
+++ b/src/arrow/java/plasma/src/main/java/org/apache/arrow/plasma/exceptions/DuplicateObjectException.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.plasma.exceptions;
+
+/**
+ * Thrown when attempting to place an object into the store for an ID that already exists.
+ */
+public class DuplicateObjectException extends RuntimeException {
+
+ public DuplicateObjectException(String objectId) {
+ super("An object with ID " + objectId + " already exists in the plasma store.");
+ }
+
+ public DuplicateObjectException(String objectId, Throwable t) {
+ super("An object with ID " + objectId + " already exists in the plasma store.", t);
+ }
+}
diff --git a/src/arrow/java/plasma/src/main/java/org/apache/arrow/plasma/exceptions/PlasmaClientException.java b/src/arrow/java/plasma/src/main/java/org/apache/arrow/plasma/exceptions/PlasmaClientException.java
new file mode 100644
index 000000000..ff9d96b17
--- /dev/null
+++ b/src/arrow/java/plasma/src/main/java/org/apache/arrow/plasma/exceptions/PlasmaClientException.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.plasma.exceptions;
+
+/**
+ * Generic exception thrown by the plasma client (for example on failure to connect).
+ */
+public class PlasmaClientException extends RuntimeException {
+
+ public PlasmaClientException(String message) {
+ super(message);
+ }
+
+ public PlasmaClientException(String message, Throwable t) {
+ super(message, t);
+ }
+}
diff --git a/src/arrow/java/plasma/src/main/java/org/apache/arrow/plasma/exceptions/PlasmaOutOfMemoryException.java b/src/arrow/java/plasma/src/main/java/org/apache/arrow/plasma/exceptions/PlasmaOutOfMemoryException.java
new file mode 100644
index 000000000..ffc4177eb
--- /dev/null
+++ b/src/arrow/java/plasma/src/main/java/org/apache/arrow/plasma/exceptions/PlasmaOutOfMemoryException.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.plasma.exceptions;
+
+/**
+ * Indicates no more memory is available in Plasma.
+ */
+public class PlasmaOutOfMemoryException extends RuntimeException {
+
+ public PlasmaOutOfMemoryException(String message) {
+ super("The plasma store ran out of memory." + message);
+ }
+
+ public PlasmaOutOfMemoryException(String message, Throwable t) {
+ super("The plasma store ran out of memory." + message, t);
+ }
+
+ public PlasmaOutOfMemoryException() {
+ super("The plasma store ran out of memory.");
+ }
+
+ public PlasmaOutOfMemoryException(Throwable t) {
+ super("The plasma store ran out of memory.", t);
+ }
+}
diff --git a/src/arrow/java/plasma/src/test/java/org/apache/arrow/plasma/PlasmaClientTest.java b/src/arrow/java/plasma/src/test/java/org/apache/arrow/plasma/PlasmaClientTest.java
new file mode 100644
index 000000000..e02ee51f9
--- /dev/null
+++ b/src/arrow/java/plasma/src/test/java/org/apache/arrow/plasma/PlasmaClientTest.java
@@ -0,0 +1,313 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.plasma;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+
+import org.apache.arrow.plasma.exceptions.DuplicateObjectException;
+import org.apache.arrow.plasma.exceptions.PlasmaClientException;
+import org.apache.arrow.plasma.exceptions.PlasmaOutOfMemoryException;
+import org.junit.Assert;
+
+public class PlasmaClientTest {
+
+ private String storeSuffix = "/tmp/store";
+
+ private Process storeProcess;
+
+ private int storePort;
+
+ private ObjectStoreLink pLink;
+
+
+ public PlasmaClientTest() throws Exception {
+ try {
+ String plasmaStorePath = System.getenv("PLASMA_STORE");
+ if (plasmaStorePath == null) {
+ throw new Exception("Please set plasma store path in env PLASMA_STORE");
+ }
+
+ this.startObjectStore(plasmaStorePath);
+ System.loadLibrary("plasma_java");
+ pLink = new PlasmaClient(this.getStoreAddress(), "", 0);
+ } catch (Throwable t) {
+ cleanup();
+ throw t;
+ }
+
+ }
+
+ private Process startProcess(String[] cmd) {
+ ProcessBuilder builder;
+ List<String> newCmd = Arrays.stream(cmd).filter(s -> s.length() > 0).collect(Collectors.toList());
+ builder = new ProcessBuilder(newCmd);
+ builder.inheritIO();
+ Process p = null;
+ try {
+ p = builder.start();
+ } catch (IOException e) {
+ e.printStackTrace();
+ return null;
+ }
+ System.out.println("Start process " + p.hashCode() + " OK, cmd = " + Arrays.toString(cmd).replace(',', ' '));
+ return p;
+ }
+
+ private void startObjectStore(String plasmaStorePath) {
+ int occupiedMemoryMB = 10;
+ long memoryBytes = occupiedMemoryMB * 1000000;
+ int numRetries = 10;
+ Process p = null;
+ while (numRetries-- > 0) {
+ int currentPort = java.util.concurrent.ThreadLocalRandom.current().nextInt(0, 100000);
+ String name = storeSuffix + currentPort;
+ String cmd = plasmaStorePath + " -s " + name + " -m " + memoryBytes;
+
+ p = startProcess(cmd.split(" "));
+
+ if (p != null && p.isAlive()) {
+ try {
+ TimeUnit.MILLISECONDS.sleep(100);
+ } catch (InterruptedException e) {
+ e.printStackTrace();
+ }
+ if (p.isAlive()) {
+ storePort = currentPort;
+ break;
+ }
+ }
+ }
+
+
+ if (p == null || !p.isAlive()) {
+ throw new RuntimeException("Start object store failed ...");
+ } else {
+ storeProcess = p;
+ System.out.println("Start object store success");
+ }
+ }
+
+ private void cleanup() {
+ if (storeProcess != null && killProcess(storeProcess)) {
+ System.out.println("Kill plasma store process forcibly");
+ }
+ }
+
+ private static boolean killProcess(Process p) {
+ if (p.isAlive()) {
+ p.destroyForcibly();
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ public void doTest() {
+ System.out.println("Start test.");
+ int timeoutMs = 3000;
+ byte[] id1 = new byte[20];
+ Arrays.fill(id1, (byte) 1);
+ byte[] value1 = new byte[20];
+ Arrays.fill(value1, (byte) 11);
+ pLink.put(id1, value1, null);
+
+ byte[] id2 = new byte[20];
+ Arrays.fill(id2, (byte) 2);
+ byte[] value2 = new byte[20];
+ Arrays.fill(value2, (byte) 12);
+ pLink.put(id2, value2, null);
+ System.out.println("Plasma java client put test success.");
+ byte[] getValue1 = pLink.get(id1, timeoutMs, false);
+ assert Arrays.equals(value1, getValue1);
+
+ byte[] getValue2 = pLink.get(id2, timeoutMs, false);
+ assert Arrays.equals(value2, getValue2);
+ System.out.println("Plasma java client get single object test success.");
+ byte[][] ids = {id1, id2};
+ List<byte[]> values = pLink.get(ids, timeoutMs, false);
+ assert Arrays.equals(values.get(0), value1);
+ assert Arrays.equals(values.get(1), value2);
+ System.out.println("Plasma java client get multi-object test success.");
+ try {
+ pLink.put(id1, value1, null);
+ Assert.fail("Fail to throw DuplicateObjectException when put an object into plasma store twice.");
+ } catch (DuplicateObjectException e) {
+ System.out.println("Plasma java client put same object twice exception test success.");
+ }
+ byte[] id1Hash = pLink.hash(id1);
+ assert id1Hash != null;
+ System.out.println("Plasma java client hash test success.");
+
+ boolean exist = pLink.contains(id2);
+ assert exist;
+ byte[] id3 = new byte[20];
+ Arrays.fill(id3, (byte) 3);
+ boolean notExist = pLink.contains(id3);
+ assert !notExist;
+ System.out.println("Plasma java client contains test success.");
+
+ byte[] id4 = new byte[20];
+ Arrays.fill(id4, (byte) 4);
+ byte[] value4 = new byte[20];
+ byte[] meta4 = "META4".getBytes();
+ Arrays.fill(value4, (byte) 14);
+ pLink.put(id4, value4, meta4);
+
+ List<byte[]> existIds = Arrays.asList(id1, id2, id3, id4);
+ List<byte[]> listIds = pLink.list();
+ assert listIds.size() == 4;
+ for (byte[] existId : existIds) {
+ boolean found = false;
+ for (byte[] listId : listIds) {
+ if (Arrays.equals(listId, existId)) {
+ found = true;
+ }
+ }
+ assert found;
+ }
+ System.out.println("Plasma java client list test success.");
+
+ byte[] id5 = new byte[20];
+ Arrays.fill(id5, (byte) 5);
+ byte[] value5 = new byte[20];
+ byte[] meta5 = "META5".getBytes();
+ Arrays.fill(value5, (byte) 15);
+ pLink.put(id5, value5, meta5);
+
+ byte[] getMeta4 = pLink.get(id4, timeoutMs, true);
+ assert Arrays.equals(meta4, getMeta4);
+ byte[] getValue4 = pLink.get(id4, timeoutMs, false);
+ assert Arrays.equals(value4, getValue4);
+ byte[][] ids4 = new byte[1][];
+ ids4[0] = id4;
+ ObjectStoreLink.ObjectStoreData fullData4 = pLink.get(ids4, timeoutMs).get(0);
+ assert Arrays.equals(meta4, fullData4.metadata);
+ assert Arrays.equals(value4, fullData4.data);
+
+ byte[] getMeta5 = pLink.get(id5, timeoutMs, true);
+ assert Arrays.equals(meta5, getMeta5);
+ byte[] getValue5 = pLink.get(id5, timeoutMs, false);
+ assert Arrays.equals(value5, getValue5);
+ byte[][] ids5 = new byte[1][];
+ ids5[0] = id5;
+ ObjectStoreLink.ObjectStoreData fullData5 = pLink.get(ids5, timeoutMs).get(0);
+ assert Arrays.equals(meta5, fullData5.metadata);
+ assert Arrays.equals(value5, fullData5.data);
+ System.out.println("Plasma java client metadata get test success.");
+
+ byte[] id6 = getArrayFilledWithValue(20, (byte) 6);
+ byte[] val6 = getArrayFilledWithValue(21, (byte) 6);
+ pLink.put(id6, val6, null);
+ assert pLink.contains(id6);
+ pLink.delete(id6);
+ assert !pLink.contains(id6);
+ System.out.println("Plasma java client delete test success.");
+
+ // Test calling shutdown while getting the object.
+ Thread thread = new Thread(() -> {
+ try {
+ TimeUnit.SECONDS.sleep(1);
+ cleanup();
+ } catch (InterruptedException e) {
+ throw new RuntimeException("Got InterruptedException when sleeping.", e);
+ }
+ });
+ thread.start();
+
+ try {
+ byte[] idNone = new byte[20];
+ Arrays.fill(idNone, (byte) 987);
+ pLink.get(idNone, timeoutMs, false);
+ Assert.fail("Fail to throw PlasmaClientException when get an object " +
+ "when object store shutdown.");
+ } catch (PlasmaClientException e) {
+ System.out.println(String.format("Expected PlasmaClientException: %s", e));
+ }
+
+ try {
+ thread.join();
+ } catch (Exception e) {
+ System.out.println(String.format("Exception caught: %s", e));
+ }
+ System.out.println("All test success.");
+
+ }
+
+ public void doByteBufferTest() {
+ System.out.println("Start ByteBuffer test.");
+ PlasmaClient client = (PlasmaClient) pLink;
+ byte[] id = new byte[20];
+ Arrays.fill(id, (byte) 10);
+ ByteBuffer buf = client.create(id, 100, null);
+ assert buf.isDirect();
+ for (int i = 0; i < 10; i++) {
+ buf.putInt(i);
+ }
+ client.seal(id);
+ client.release(id);
+ // buf is not available now.
+ assert client.contains(id);
+ System.out.println("Plasma java client create test success.");
+
+ ByteBuffer buf1 = client.getObjAsByteBuffer(id, -1, false);
+ assert buf1.limit() == 100;
+ for (int i = 0; i < 10; i++) {
+ assert buf1.getInt() == i;
+ }
+ System.out.println("Plasma java client getObjAsByteBuffer test success");
+ client.release(id);
+ }
+
+ public void doPlasmaOutOfMemoryExceptionTest() {
+ System.out.println("Start PlasmaOutOfMemoryException test.");
+ PlasmaClient client = (PlasmaClient) pLink;
+ byte[] objectId = new byte[20];
+ Arrays.fill(objectId, (byte) 1);
+ try {
+ ByteBuffer byteBuffer = client.create(objectId, 200000000, null);
+ Assert.fail("Fail to create an object, The plasma store ran out of memory.");
+ } catch (PlasmaOutOfMemoryException e) {
+ System.out.println(String.format("Expected PlasmaOutOfMemoryException: %s", e));
+ System.out.println("PlasmaOutOfMemoryException test success.");
+ }
+ }
+
+ private byte[] getArrayFilledWithValue(int arrayLength, byte val) {
+ byte[] arr = new byte[arrayLength];
+ Arrays.fill(arr, val);
+ return arr;
+ }
+
+ public String getStoreAddress() {
+ return storeSuffix + storePort;
+ }
+
+ public static void main(String[] args) throws Exception {
+
+ PlasmaClientTest plasmaClientTest = new PlasmaClientTest();
+ plasmaClientTest.doPlasmaOutOfMemoryExceptionTest();
+ plasmaClientTest.doByteBufferTest();
+ plasmaClientTest.doTest();
+ }
+
+}
diff --git a/src/arrow/java/plasma/src/test/resources/logback.xml b/src/arrow/java/plasma/src/test/resources/logback.xml
new file mode 100644
index 000000000..4c54d18a2
--- /dev/null
+++ b/src/arrow/java/plasma/src/test/resources/logback.xml
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+ license agreements. See the NOTICE file distributed with this work for additional
+ information regarding copyright ownership. The ASF licenses this file to
+ You under the Apache License, Version 2.0 (the "License"); you may not use
+ this file except in compliance with the License. You may obtain a copy of
+ the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+ by applicable law or agreed to in writing, software distributed under the
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ OF ANY KIND, either express or implied. See the License for the specific
+ language governing permissions and limitations under the License. -->
+
+<configuration>
+ <statusListener class="ch.qos.logback.core.status.NopStatusListener"/>
+ <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
+ <!-- encoders are assigned the type
+ ch.qos.logback.classic.encoder.PatternLayoutEncoder by default -->
+ <encoder>
+ <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
+ </encoder>
+ </appender>
+
+ <logger name="org.apache.arrow" additivity="false">
+ <level value="info" />
+ <appender-ref ref="STDOUT" />
+ </logger>
+
+</configuration>
diff --git a/src/arrow/java/plasma/test.sh b/src/arrow/java/plasma/test.sh
new file mode 100755
index 000000000..dbfae646c
--- /dev/null
+++ b/src/arrow/java/plasma/test.sh
@@ -0,0 +1,56 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd)
+unamestr="$(uname)"
+if [[ "$unamestr" == "Linux" ]]; then
+ PARALLEL=$(nproc)
+elif [[ "$unamestr" == "Darwin" ]]; then
+ PARALLEL=$(sysctl -n hw.ncpu)
+else
+ echo "Unrecognized platform."
+ exit 1
+fi
+pushd ../../cpp
+ if [ ! -d "release" ]; then
+ mkdir release
+ fi
+ pushd release
+ cmake -DCMAKE_BUILD_TYPE=Release \
+ -DCMAKE_C_FLAGS="-g -O3" \
+ -DCMAKE_CXX_FLAGS="-g -O3" \
+ -DARROW_BUILD_TESTS=off \
+ -DARROW_HDFS=on \
+ -DARROW_BOOST_USE_SHARED=on \
+ -DARROW_PYTHON=on \
+ -DARROW_PLASMA=on \
+ -DPLASMA_PYTHON=on \
+ -DARROW_JEMALLOC=off \
+ -DARROW_WITH_BROTLI=off \
+ -DARROW_WITH_LZ4=off \
+ -DARROW_WITH_ZLIB=off \
+ -DARROW_WITH_ZSTD=off \
+ -DARROW_PLASMA_JAVA_CLIENT=on \
+ ..
+ make VERBOSE=1 -j$PARALLEL
+ popd
+popd
+
+mvn clean install
+export PLASMA_STORE=$ROOT_DIR/../../cpp/release/release/plasma-store-server
+java -cp target/test-classes:target/classes -Djava.library.path=$ROOT_DIR/../../cpp/release/release/ org.apache.arrow.plasma.PlasmaClientTest
diff --git a/src/arrow/java/pom.xml b/src/arrow/java/pom.xml
new file mode 100644
index 000000000..d719e4c25
--- /dev/null
+++ b/src/arrow/java/pom.xml
@@ -0,0 +1,839 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+ license agreements. See the NOTICE file distributed with this work for additional
+ information regarding copyright ownership. The ASF licenses this file to
+ You under the Apache License, Version 2.0 (the "License"); you may not use
+ this file except in compliance with the License. You may obtain a copy of
+ the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+ by applicable law or agreed to in writing, software distributed under the
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ OF ANY KIND, either express or implied. See the License for the specific
+ language governing permissions and limitations under the License. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache</groupId>
+ <artifactId>apache</artifactId>
+ <version>18</version>
+ </parent>
+
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-java-root</artifactId>
+ <version>6.0.1</version>
+ <packaging>pom</packaging>
+
+ <name>Apache Arrow Java Root POM</name>
+ <description>Apache Arrow is open source, in-memory columnar data structures and low-overhead messaging</description>
+ <url>https://arrow.apache.org/</url>
+
+ <properties>
+ <target.gen.source.path>${project.build.directory}/generated-sources</target.gen.source.path>
+ <dep.junit.platform.version>1.4.0</dep.junit.platform.version>
+ <dep.junit.jupiter.version>5.4.0</dep.junit.jupiter.version>
+ <dep.slf4j.version>1.7.25</dep.slf4j.version>
+ <dep.guava.version>30.1.1-jre</dep.guava.version>
+ <dep.netty.version>4.1.68.Final</dep.netty.version>
+ <dep.jackson.version>2.11.4</dep.jackson.version>
+ <dep.hadoop.version>2.7.1</dep.hadoop.version>
+ <dep.fbs.version>1.12.0</dep.fbs.version>
+ <dep.avro.version>1.10.0</dep.avro.version>
+ <arrow.vector.classifier />
+ <forkCount>2</forkCount>
+ <checkstyle.failOnViolation>true</checkstyle.failOnViolation>
+ <errorprone.javac.version>9+181-r4173-1</errorprone.javac.version>
+ </properties>
+
+ <scm>
+ <connection>scm:git:https://github.com/apache/arrow.git</connection>
+ <developerConnection>scm:git:https://github.com/apache/arrow.git</developerConnection>
+ <url>https://github.com/apache/arrow</url>
+ <tag>apache-arrow-2.0.0</tag>
+ </scm>
+
+ <mailingLists>
+ <mailingList>
+ <name>Developer List</name>
+ <subscribe>dev-subscribe@arrow.apache.org</subscribe>
+ <unsubscribe>dev-unsubscribe@arrow.apache.org</unsubscribe>
+ <post>dev@arrow.apache.org</post>
+ <archive>https://mail-archives.apache.org/mod_mbox/arrow-dev/</archive>
+ </mailingList>
+ <mailingList>
+ <name>Commits List</name>
+ <subscribe>commits-subscribe@arrow.apache.org</subscribe>
+ <unsubscribe>commits-unsubscribe@arrow.apache.org</unsubscribe>
+ <post>commits@arrow.apache.org</post>
+ <archive>https://mail-archives.apache.org/mod_mbox/arrow-commits/</archive>
+ </mailingList>
+ <mailingList>
+ <name>Issues List</name>
+ <subscribe>issues-subscribe@arrow.apache.org</subscribe>
+ <unsubscribe>issues-unsubscribe@arrow.apache.org</unsubscribe>
+ <archive>https://mail-archives.apache.org/mod_mbox/arrow-issues/</archive>
+ </mailingList>
+ </mailingLists>
+
+ <repositories>
+
+ </repositories>
+
+ <issueManagement>
+ <system>Jira</system>
+ <url>https://issues.apache.org/jira/browse/arrow</url>
+ </issueManagement>
+
+ <build>
+
+ <plugins>
+ <plugin>
+ <groupId>org.apache.rat</groupId>
+ <artifactId>apache-rat-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>rat-checks</id>
+ <phase>validate</phase>
+ <goals>
+ <goal>check</goal>
+ </goals>
+ </execution>
+ </executions>
+ <configuration>
+ <excludeSubProjects>false</excludeSubProjects>
+ <excludes>
+ <exclude>**/dependency-reduced-pom.xml</exclude>
+ <exclude>**/*.log</exclude>
+ <exclude>**/*.css</exclude>
+ <exclude>**/*.js</exclude>
+ <exclude>**/*.md</exclude>
+ <exclude>**/*.eps</exclude>
+ <exclude>**/*.json</exclude>
+ <exclude>**/*.seq</exclude>
+ <exclude>**/*.parquet</exclude>
+ <exclude>**/*.sql</exclude>
+ <exclude>**/git.properties</exclude>
+ <exclude>**/*.csv</exclude>
+ <exclude>**/*.csvh</exclude>
+ <exclude>**/*.csvh-test</exclude>
+ <exclude>**/*.tsv</exclude>
+ <exclude>**/*.txt</exclude>
+ <exclude>**/*.ssv</exclude>
+ <exclude>**/arrow-*.conf</exclude>
+ <exclude>**/.buildpath</exclude>
+ <exclude>**/*.proto</exclude>
+ <exclude>**/*.fmpp</exclude>
+ <exclude>**/target/**</exclude>
+ <exclude>**/*.tdd</exclude>
+ <exclude>**/*.project</exclude>
+ <exclude>**/TAGS</exclude>
+ <exclude>**/*.checkstyle</exclude>
+ <exclude>**/.classpath</exclude>
+ <exclude>**/.factorypath</exclude>
+ <exclude>**/.settings/**</exclude>
+ <exclude>.*/**</exclude>
+ <exclude>**/*.patch</exclude>
+ <exclude>**/*.pb.cc</exclude>
+ <exclude>**/*.pb.h</exclude>
+ <exclude>**/*.linux</exclude>
+ <exclude>**/client/build/**</exclude>
+ <exclude>**/*.tbl</exclude>
+ <exclude>**/*.iml</exclude>
+ </excludes>
+ </configuration>
+ </plugin>
+
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-jar-plugin</artifactId>
+ <configuration>
+ <excludes>
+ <exclude>**/logging.properties</exclude>
+ <exclude>**/logback-test.xml</exclude>
+ <exclude>**/logback.out.xml</exclude>
+ <exclude>**/logback.xml</exclude>
+ </excludes>
+ <archive>
+ <index>true</index>
+ <manifest>
+ <addDefaultImplementationEntries>true</addDefaultImplementationEntries>
+ <addDefaultSpecificationEntries>true</addDefaultSpecificationEntries>
+ </manifest>
+ <manifestEntries>
+ <Extension-Name>org.apache.arrow</Extension-Name>
+ <Built-By>${username}</Built-By>
+ <url>https://arrow.apache.org/</url>
+ </manifestEntries>
+ </archive>
+ </configuration>
+ <executions>
+ <execution>
+ <goals>
+ <goal>test-jar</goal>
+ </goals>
+ <configuration>
+ <skipIfEmpty>true</skipIfEmpty>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+
+
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-resources-plugin</artifactId>
+ <configuration>
+ <encoding>UTF-8</encoding>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <configuration>
+ <source>1.8</source>
+ <target>1.8</target>
+ <maxmem>2048m</maxmem>
+ <useIncrementalCompilation>false</useIncrementalCompilation>
+ <fork>true</fork>
+ </configuration>
+ </plugin>
+ <plugin>
+ <artifactId>maven-enforcer-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>validate_java_and_maven_version</id>
+ <phase>verify</phase>
+ <goals>
+ <goal>enforce</goal>
+ </goals>
+ <inherited>false</inherited>
+ <configuration>
+ <rules>
+ <requireMavenVersion>
+ <version>[3.3.0,4)</version>
+ </requireMavenVersion>
+ </rules>
+ </configuration>
+ </execution>
+ <execution>
+ <id>avoid_bad_dependencies</id>
+ <phase>verify</phase>
+ <goals>
+ <goal>enforce</goal>
+ </goals>
+ <configuration>
+ <rules>
+ <bannedDependencies>
+ <excludes>
+ <exclude>commons-logging</exclude>
+ <exclude>javax.servlet:servlet-api</exclude>
+ <exclude>org.mortbay.jetty:servlet-api</exclude>
+ <exclude>org.mortbay.jetty:servlet-api-2.5</exclude>
+ <exclude>log4j:log4j</exclude>
+ </excludes>
+ </bannedDependencies>
+ </rules>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>pl.project13.maven</groupId>
+ <artifactId>git-commit-id-plugin</artifactId>
+ <version>2.2.2</version>
+ <executions>
+ <execution>
+ <id>for-jars</id>
+ <inherited>true</inherited>
+ <goals>
+ <goal>revision</goal>
+ </goals>
+ <configuration>
+ <generateGitPropertiesFilename>target/classes/git.properties</generateGitPropertiesFilename>
+ </configuration>
+ </execution>
+ <execution>
+ <id>for-source-tarball</id>
+ <goals>
+ <goal>revision</goal>
+ </goals>
+ <inherited>false</inherited>
+ <configuration>
+ <generateGitPropertiesFilename>./git.properties</generateGitPropertiesFilename>
+ </configuration>
+ </execution>
+ </executions>
+
+ <configuration>
+ <dateFormat>dd.MM.yyyy '@' HH:mm:ss z</dateFormat>
+ <verbose>false</verbose>
+ <skipPoms>false</skipPoms>
+ <generateGitPropertiesFile>true</generateGitPropertiesFile>
+ <failOnNoGitDirectory>false</failOnNoGitDirectory>
+ <gitDescribe>
+ <skip>false</skip>
+ <always>false</always>
+ <abbrev>7</abbrev>
+ <dirty>-dirty</dirty>
+ <forceLongFormat>true</forceLongFormat>
+ </gitDescribe>
+ </configuration>
+ </plugin>
+
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-checkstyle-plugin</artifactId>
+ <version>3.1.0</version>
+ <dependencies>
+ <dependency>
+ <groupId>com.puppycrawl.tools</groupId>
+ <artifactId>checkstyle</artifactId>
+ <version>8.19</version>
+ </dependency>
+ <dependency>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ <version>${dep.guava.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>jcl-over-slf4j</artifactId>
+ <version>1.7.5</version>
+ </dependency>
+ </dependencies>
+ <executions>
+ <execution>
+ <id>validate</id>
+ <phase>validate</phase>
+ <goals>
+ <goal>check</goal>
+ </goals>
+ </execution>
+ </executions>
+ <configuration>
+ <configLocation>dev/checkstyle/checkstyle.xml</configLocation>
+ <headerLocation>dev/checkstyle/checkstyle.license</headerLocation>
+ <suppressionsLocation>dev/checkstyle/suppressions.xml</suppressionsLocation>
+ <includeTestSourceDirectory>true</includeTestSourceDirectory>
+ <encoding>UTF-8</encoding>
+ <consoleOutput>true</consoleOutput>
+ <failsOnError>${checkstyle.failOnViolation}</failsOnError>
+ <failOnViolation>${checkstyle.failOnViolation}</failOnViolation>
+ <violationSeverity>warning</violationSeverity>
+ <format>xml</format>
+ <format>html</format>
+ <outputFile>${project.build.directory}/test/checkstyle-errors.xml</outputFile>
+ <linkXRef>false</linkXRef>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-dependency-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>analyze</id>
+ <phase>verify</phase>
+ <goals>
+ <goal>analyze-only</goal>
+ </goals>
+ <configuration>
+ <ignoreNonCompile>true</ignoreNonCompile>
+ <failOnWarning>true</failOnWarning>
+ <ignoredDependencies>
+ <!-- source annotations (not kept in compiled code) -->
+ <ignoredDependency>javax.annotation:javax.annotation-api:*</ignoredDependency>
+ </ignoredDependencies>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+
+ <pluginManagement>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-dependency-plugin</artifactId>
+ <version>3.0.1</version>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.rat</groupId>
+ <artifactId>apache-rat-plugin</artifactId>
+ <version>0.13</version>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-resources-plugin</artifactId>
+ <version>2.6</version>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>3.6.2</version>
+ <configuration>
+ <annotationProcessorPaths>
+ <path>
+ <groupId>org.immutables</groupId>
+ <artifactId>value</artifactId>
+ <version>2.8.2</version>
+ </path>
+ </annotationProcessorPaths>
+ </configuration>
+ </plugin>
+ <plugin>
+ <artifactId>maven-enforcer-plugin</artifactId>
+ <version>3.0.0-M2</version>
+ </plugin>
+ <plugin>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <version>3.0.0-M3</version>
+ <configuration>
+ <enableAssertions>true</enableAssertions>
+ <childDelegation>true</childDelegation>
+ <forkCount>${forkCount}</forkCount>
+ <reuseForks>true</reuseForks>
+ <systemPropertyVariables>
+ <java.io.tmpdir>${project.build.directory}</java.io.tmpdir>
+ <io.netty.tryReflectionSetAccessible>true</io.netty.tryReflectionSetAccessible>
+ <user.timezone>UTC</user.timezone>
+ <!-- Note: changing the below configuration might increase the max allocation size for a vector
+ which in turn can cause OOM. -->
+ <arrow.vector.max_allocation_bytes>1048576</arrow.vector.max_allocation_bytes>
+ </systemPropertyVariables>
+ </configuration>
+ </plugin>
+ <plugin>
+ <artifactId>maven-failsafe-plugin</artifactId>
+ <version>3.0.0-M3</version>
+ <configuration>
+ <systemPropertyVariables>
+ <java.io.tmpdir>${project.build.directory}</java.io.tmpdir>
+ <io.netty.tryReflectionSetAccessible>true</io.netty.tryReflectionSetAccessible>
+ <user.timezone>UTC</user.timezone>
+ </systemPropertyVariables>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.jacoco</groupId>
+ <artifactId>jacoco-maven-plugin</artifactId>
+ <version>0.8.7</version>
+ </plugin>
+
+ <!--This plugin's configuration is used to store Eclipse m2e settings
+ only. It has no influence on the Maven build itself. -->
+ <plugin>
+ <groupId>org.eclipse.m2e</groupId>
+ <artifactId>lifecycle-mapping</artifactId>
+ <version>1.0.0</version>
+ <configuration>
+ <lifecycleMappingMetadata>
+ <pluginExecutions>
+ <pluginExecution>
+ <pluginExecutionFilter>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-antrun-plugin</artifactId>
+ <versionRange>[1.6,)</versionRange>
+ <goals>
+ <goal>run</goal>
+ </goals>
+ </pluginExecutionFilter>
+ <action>
+ <ignore />
+ </action>
+ </pluginExecution>
+ <pluginExecution>
+ <pluginExecutionFilter>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-enforcer-plugin</artifactId>
+ <versionRange>[1.2,)</versionRange>
+ <goals>
+ <goal>enforce</goal>
+ </goals>
+ </pluginExecutionFilter>
+ <action>
+ <ignore />
+ </action>
+ </pluginExecution>
+ <pluginExecution>
+ <pluginExecutionFilter>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>
+ maven-remote-resources-plugin
+ </artifactId>
+ <versionRange>[1.1,)</versionRange>
+ <goals>
+ <goal>process</goal>
+ </goals>
+ </pluginExecutionFilter>
+ <action>
+ <ignore />
+ </action>
+ </pluginExecution>
+ <pluginExecution>
+ <pluginExecutionFilter>
+ <groupId>org.apache.rat</groupId>
+ <artifactId>apache-rat-plugin</artifactId>
+ <versionRange>[0.10,)</versionRange>
+ <goals>
+ <goal>check</goal>
+ </goals>
+ </pluginExecutionFilter>
+ <action>
+ <ignore />
+ </action>
+ </pluginExecution>
+ <pluginExecution>
+ <pluginExecutionFilter>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-checkstyle-plugin</artifactId>
+ <versionRange>[0,)</versionRange>
+ <goals>
+ <goal>check</goal>
+ </goals>
+ </pluginExecutionFilter>
+ <action>
+ <ignore />
+ </action>
+ </pluginExecution>
+ </pluginExecutions>
+ </lifecycleMappingMetadata>
+ </configuration>
+ </plugin>
+ </plugins>
+ </pluginManagement>
+ </build>
+
+ <dependencyManagement>
+ <dependencies>
+ <dependency>
+ <groupId>com.google.flatbuffers</groupId>
+ <artifactId>flatbuffers-java</artifactId>
+ <version>${dep.fbs.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ <version>${dep.guava.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>io.netty</groupId>
+ <artifactId>netty-handler</artifactId>
+ <version>${dep.netty.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>io.netty</groupId>
+ <artifactId>netty-buffer</artifactId>
+ <version>${dep.netty.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>io.netty</groupId>
+ <artifactId>netty-common</artifactId>
+ <version>${dep.netty.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-core</artifactId>
+ <version>${dep.jackson.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-annotations</artifactId>
+ <version>${dep.jackson.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-databind</artifactId>
+ <version>${dep.jackson.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>com.google.code.findbugs</groupId>
+ <artifactId>jsr305</artifactId>
+ <version>3.0.2</version>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ <version>${dep.slf4j.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>javax.annotation</groupId>
+ <artifactId>javax.annotation-api</artifactId>
+ <version>1.3.2</version>
+ </dependency>
+ <dependency>
+ <groupId>org.immutables</groupId>
+ <artifactId>value</artifactId>
+ <version>2.8.2</version>
+ <scope>provided</scope>
+ </dependency>
+ </dependencies>
+ </dependencyManagement>
+
+ <dependencies>
+ <!-- Test Dependencies -->
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>jul-to-slf4j</artifactId>
+ <version>${dep.slf4j.version}</version>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>jcl-over-slf4j</artifactId>
+ <version>${dep.slf4j.version}</version>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>log4j-over-slf4j</artifactId>
+ <version>${dep.slf4j.version}</version>
+ <scope>test</scope>
+ </dependency>
+
+
+ <dependency>
+ <groupId>org.junit.platform</groupId>
+ <artifactId>junit-platform-runner</artifactId>
+ <version>${dep.junit.platform.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.junit.jupiter</groupId>
+ <artifactId>junit-jupiter-engine</artifactId>
+ <version>${dep.junit.jupiter.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.junit.jupiter</groupId>
+ <artifactId>junit-jupiter-api</artifactId>
+ <version>${dep.junit.jupiter.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.junit.vintage</groupId>
+ <artifactId>junit-vintage-engine</artifactId>
+ <version>${dep.junit.jupiter.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <!-- Use to keep older tests running -->
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>4.12</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.mockito</groupId>
+ <artifactId>mockito-junit-jupiter</artifactId>
+ <version>2.25.1</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>ch.qos.logback</groupId>
+ <artifactId>logback-classic</artifactId>
+ <version>1.2.3</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>de.huxhorn.lilith</groupId>
+ <artifactId>de.huxhorn.lilith.logback.appender.multiplex-classic</artifactId>
+ <version>0.9.44</version>
+ <scope>test</scope>
+ </dependency>
+
+ </dependencies>
+
+ <reporting>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-javadoc-plugin</artifactId>
+ <version>3.0.0-M1</version>
+ <reportSets>
+ <reportSet><!-- by default, id = "default" -->
+ <reports><!-- select non-aggregate reports -->
+ <report>javadoc</report>
+ <report>test-javadoc</report>
+ </reports>
+ </reportSet>
+ <reportSet><!-- aggregate reportSet, to define in poms having modules -->
+ <id>aggregate</id>
+ <inherited>false</inherited><!-- don't run aggregate in child modules -->
+ <reports>
+ <report>aggregate</report>
+ </reports>
+ </reportSet>
+ </reportSets>
+ </plugin>
+ </plugins>
+ </reporting>
+
+ <modules>
+ <module>format</module>
+ <module>memory</module>
+ <module>vector</module>
+ <module>tools</module>
+ <module>adapter/jdbc</module>
+ <module>plasma</module>
+ <module>flight/flight-core</module>
+ <module>flight/flight-grpc</module>
+ <module>performance</module>
+ <module>algorithm</module>
+ <module>adapter/avro</module>
+ <module>compression</module>
+ </modules>
+
+ <profiles>
+ <profile>
+ <id>java-8</id>
+ <activation>
+ <jdk>[1.8,)</jdk>
+ </activation>
+ <properties>
+ <doclint>none</doclint>
+ <additionalparam>-Xdoclint:none</additionalparam>
+ </properties>
+ </profile>
+
+ <profile>
+ <!-- C data interface depends on building a native library -->
+ <id>arrow-c-data</id>
+ <modules>
+ <module>c</module>
+ </modules>
+ </profile>
+
+ <profile>
+ <!-- orc java depends on arrow cpp, and arrow cpp isn't enabled by default yet -->
+ <id>arrow-jni</id>
+ <modules>
+ <!-- these have dependency on cpp -->
+ <module>adapter/orc</module>
+ <module>gandiva</module>
+ <module>dataset</module>
+ </modules>
+ </profile>
+
+ <profile>
+ <!-- Use the version of arrow-vector that shades flatbuffers and packages format -->
+ <id>shade-flatbuffers</id>
+ <properties>
+ <arrow.vector.classifier>shade-format-flatbuffers</arrow.vector.classifier>
+ </properties>
+ </profile>
+
+ <profile>
+ <id>error-prone</id>
+ <!--
+ Do not activate Error Prone while running with Eclipse/M2E as it causes incompatibilities
+ with other annotation processors.
+ See https://github.com/jbosstools/m2e-apt/issues/62 for details
+ -->
+ <activation>
+ <property>
+ <name>!m2e.version</name>
+ </property>
+ </activation>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <configuration>
+ <compilerArgs combine.children="append">
+ <arg>-XDcompilePolicy=simple</arg>
+ <arg>-Xplugin:ErrorProne</arg>
+ </compilerArgs>
+ <annotationProcessorPaths combine.children="append">
+ <path>
+ <groupId>com.google.errorprone</groupId>
+ <artifactId>error_prone_core</artifactId>
+ <version>2.4.0</version>
+ </path>
+ </annotationProcessorPaths>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+ </profile>
+
+ <profile>
+ <id>error-prone-jdk8</id>
+ <!-- using github.com/google/error-prone-javac is required when running on JDK 8 -->
+ <activation>
+ <jdk>1.8</jdk>
+ <property>
+ <name>!m2e.version</name>
+ </property>
+ </activation>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <configuration>
+ <fork>true</fork>
+ <compilerArgs combine.children="append">
+ <arg>-J-Xbootclasspath/p:${settings.localRepository}/com/google/errorprone/javac/${errorprone.javac.version}/javac-${errorprone.javac.version}.jar</arg>
+ </compilerArgs>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+ </profile>
+
+ <profile>
+ <id>code-coverage</id>
+ <!--
+ Use `mvn -Pcode-coverage install site` to capture and generate the code coverage report
+ Alternatively, if you do not want to generate the full set of reports, use:
+ `mvn -Pcode-coverage install org.jacoco:jacoco-maven-plugin:report org.jacoco:jacoco-maven-plugin:report-integration`
+
+ Add `-Pintegration-tests` to the previous commandline to also capture integration tests coverage
+ -->
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.jacoco</groupId>
+ <artifactId>jacoco-maven-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>default-prepare-agent</id>
+ <goals>
+ <goal>prepare-agent</goal>
+ </goals>
+ </execution>
+ <execution>
+ <id>default-prepare-agent-integration</id>
+ <goals>
+ <goal>prepare-agent-integration</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+ <reporting>
+ <plugins>
+ <plugin>
+ <groupId>org.jacoco</groupId>
+ <artifactId>jacoco-maven-plugin</artifactId>
+ <version>0.8.7</version>
+ <reportSets>
+ <reportSet>
+ <inherited>false</inherited><!-- don't run aggregate in child modules -->
+ <reports>
+ <!-- select non-aggregate reports -->
+ <report>report</report>
+ </reports>
+ </reportSet>
+ </reportSets>
+ </plugin>
+ </plugins>
+ </reporting>
+ </profile>
+
+ </profiles>
+
+</project>
diff --git a/src/arrow/java/tools/pom.xml b/src/arrow/java/tools/pom.xml
new file mode 100644
index 000000000..12a19ae2e
--- /dev/null
+++ b/src/arrow/java/tools/pom.xml
@@ -0,0 +1,106 @@
+<?xml version="1.0"?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+ license agreements. See the NOTICE file distributed with this work for additional
+ information regarding copyright ownership. The ASF licenses this file to
+ You under the Apache License, Version 2.0 (the "License"); you may not use
+ this file except in compliance with the License. You may obtain a copy of
+ the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+ by applicable law or agreed to in writing, software distributed under the
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ OF ANY KIND, either express or implied. See the License for the specific
+ language governing permissions and limitations under the License. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-java-root</artifactId>
+ <version>6.0.1</version>
+ </parent>
+ <artifactId>arrow-tools</artifactId>
+ <name>Arrow Tools</name>
+ <description>Java applications for working with Arrow ValueVectors.</description>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-memory-core</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-vector</artifactId>
+ <version>${project.version}</version>
+ <classifier>${arrow.vector.classifier}</classifier>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-compression</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>commons-cli</groupId>
+ <artifactId>commons-cli</artifactId>
+ <version>1.4</version>
+ </dependency>
+ <dependency>
+ <groupId>ch.qos.logback</groupId>
+ <artifactId>logback-classic</artifactId>
+ <version>1.2.3</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-databind</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-memory-netty</artifactId>
+ <version>${project.version}</version>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-vector</artifactId>
+ <version>${project.version}</version>
+ <classifier>tests</classifier>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <plugins>
+ <plugin>
+ <artifactId>maven-assembly-plugin</artifactId>
+ <version>3.0.0</version>
+ <configuration>
+ <descriptorRefs>
+ <descriptorRef>jar-with-dependencies</descriptorRef>
+ </descriptorRefs>
+ </configuration>
+ <executions>
+ <execution>
+ <id>make-assembly</id>
+ <phase>package</phase>
+ <goals>
+ <goal>single</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+
+</project>
diff --git a/src/arrow/java/tools/src/main/java/org/apache/arrow/tools/EchoServer.java b/src/arrow/java/tools/src/main/java/org/apache/arrow/tools/EchoServer.java
new file mode 100644
index 000000000..0ddd1e946
--- /dev/null
+++ b/src/arrow/java/tools/src/main/java/org/apache/arrow/tools/EchoServer.java
@@ -0,0 +1,146 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.tools;
+
+import java.io.IOException;
+import java.net.ServerSocket;
+import java.net.Socket;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.ipc.ArrowStreamReader;
+import org.apache.arrow.vector.ipc.ArrowStreamWriter;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Simple server that echoes back data received.
+ */
+public class EchoServer {
+ private static final Logger LOGGER = LoggerFactory.getLogger(EchoServer.class);
+ private final ServerSocket serverSocket;
+ private boolean closed = false;
+
+ /**
+ * Constructs a new instance that binds to the given port.
+ */
+ public EchoServer(int port) throws IOException {
+ LOGGER.debug("Starting echo server.");
+ serverSocket = new ServerSocket(port);
+ LOGGER.debug("Running echo server on port: " + port());
+ }
+
+ /**
+ * Main method to run the server, the first argument is an optional port number.
+ */
+ public static void main(String[] args) throws Exception {
+ int port;
+ if (args.length > 0) {
+ port = Integer.parseInt(args[0]);
+ } else {
+ port = 8080;
+ }
+ new EchoServer(port).run();
+ }
+
+ public int port() {
+ return serverSocket.getLocalPort();
+ }
+
+ /**
+ * Starts the main server event loop.
+ */
+ public void run() throws IOException {
+ try {
+ Socket clientSocket = null;
+ ClientConnection client = null;
+ while (!closed) {
+ LOGGER.debug("Waiting to accept new client connection.");
+ clientSocket = serverSocket.accept();
+ LOGGER.debug("Accepted new client connection.");
+ client = new ClientConnection(clientSocket);
+ try {
+ client.run();
+ } catch (IOException e) {
+ LOGGER.warn("Error handling client connection.", e);
+ }
+ LOGGER.debug("Closed connection with client");
+ }
+ } catch (java.net.SocketException ex) {
+ if (!closed) {
+ throw ex;
+ }
+ } finally {
+ serverSocket.close();
+ LOGGER.debug("Server closed.");
+ }
+ }
+
+ public void close() throws IOException {
+ closed = true;
+ serverSocket.close();
+ }
+
+ /**
+ * Handler for each client connection to the server.
+ */
+ public static class ClientConnection implements AutoCloseable {
+ public final Socket socket;
+
+ public ClientConnection(Socket socket) {
+ this.socket = socket;
+ }
+
+ /**
+ * Reads a record batch off the socket and writes it back out.
+ */
+ public void run() throws IOException {
+ // Read the entire input stream and write it back
+ try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE)) {
+ ArrowStreamReader reader = new ArrowStreamReader(socket.getInputStream(), allocator);
+ VectorSchemaRoot root = reader.getVectorSchemaRoot();
+ // load the first batch before instantiating the writer so that we have any dictionaries
+ reader.loadNextBatch();
+ ArrowStreamWriter writer = new ArrowStreamWriter(root, reader, socket.getOutputStream());
+ writer.start();
+ int echoed = 0;
+ while (true) {
+ int rowCount = reader.getVectorSchemaRoot().getRowCount();
+ if (rowCount == 0) {
+ break;
+ } else {
+ writer.writeBatch();
+ echoed += rowCount;
+ reader.loadNextBatch();
+ }
+ }
+ writer.end();
+ Preconditions.checkState(reader.bytesRead() == writer.bytesWritten());
+ LOGGER.debug(String.format("Echoed %d records", echoed));
+ reader.close(false);
+ }
+ }
+
+ @Override
+ public void close() throws IOException {
+ socket.close();
+ }
+ }
+}
diff --git a/src/arrow/java/tools/src/main/java/org/apache/arrow/tools/FileRoundtrip.java b/src/arrow/java/tools/src/main/java/org/apache/arrow/tools/FileRoundtrip.java
new file mode 100644
index 000000000..c49b04c85
--- /dev/null
+++ b/src/arrow/java/tools/src/main/java/org/apache/arrow/tools/FileRoundtrip.java
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.tools;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.ipc.ArrowFileReader;
+import org.apache.arrow.vector.ipc.ArrowFileWriter;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.cli.PosixParser;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Application that verifies data can be round-tripped through a file.
+ */
+public class FileRoundtrip {
+ private static final Logger LOGGER = LoggerFactory.getLogger(FileRoundtrip.class);
+ private final Options options;
+ private final PrintStream out;
+ private final PrintStream err;
+
+ FileRoundtrip(PrintStream out, PrintStream err) {
+ this.out = out;
+ this.err = err;
+ this.options = new Options();
+ this.options.addOption("i", "in", true, "input file");
+ this.options.addOption("o", "out", true, "output file");
+
+ }
+
+ public static void main(String[] args) {
+ System.exit(new FileRoundtrip(System.out, System.err).run(args));
+ }
+
+ private File validateFile(String type, String fileName) {
+ if (fileName == null) {
+ throw new IllegalArgumentException("missing " + type + " file parameter");
+ }
+ File f = new File(fileName);
+ if (!f.exists() || f.isDirectory()) {
+ throw new IllegalArgumentException(type + " file not found: " + f.getAbsolutePath());
+ }
+ return f;
+ }
+
+ int run(String[] args) {
+ try {
+ CommandLineParser parser = new PosixParser();
+ CommandLine cmd = parser.parse(options, args, false);
+
+ String inFileName = cmd.getOptionValue("in");
+ String outFileName = cmd.getOptionValue("out");
+
+ File inFile = validateFile("input", inFileName);
+ File outFile = validateFile("output", outFileName);
+
+ try (BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+ FileInputStream fileInputStream = new FileInputStream(inFile);
+ ArrowFileReader arrowReader = new ArrowFileReader(fileInputStream.getChannel(),
+ allocator)) {
+
+ VectorSchemaRoot root = arrowReader.getVectorSchemaRoot();
+ Schema schema = root.getSchema();
+ LOGGER.debug("Input file size: " + inFile.length());
+ LOGGER.debug("Found schema: " + schema);
+
+ try (FileOutputStream fileOutputStream = new FileOutputStream(outFile);
+ ArrowFileWriter arrowWriter = new ArrowFileWriter(root, arrowReader,
+ fileOutputStream.getChannel())) {
+ arrowWriter.start();
+ while (true) {
+ if (!arrowReader.loadNextBatch()) {
+ break;
+ } else {
+ arrowWriter.writeBatch();
+ }
+ }
+ arrowWriter.end();
+ }
+ LOGGER.debug("Output file size: " + outFile.length());
+ }
+ } catch (ParseException e) {
+ return fatalError("Invalid parameters", e);
+ } catch (IOException e) {
+ return fatalError("Error accessing files", e);
+ }
+ return 0;
+ }
+
+ private int fatalError(String message, Throwable e) {
+ err.println(message);
+ LOGGER.error(message, e);
+ return 1;
+ }
+
+}
diff --git a/src/arrow/java/tools/src/main/java/org/apache/arrow/tools/FileToStream.java b/src/arrow/java/tools/src/main/java/org/apache/arrow/tools/FileToStream.java
new file mode 100644
index 000000000..bb7cedeb7
--- /dev/null
+++ b/src/arrow/java/tools/src/main/java/org/apache/arrow/tools/FileToStream.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.tools;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.ipc.ArrowFileReader;
+import org.apache.arrow.vector.ipc.ArrowStreamWriter;
+
+/**
+ * Converts an Arrow file to an Arrow stream. The file should be specified as the
+ * first argument and the output is written to standard out.
+ */
+public class FileToStream {
+ private FileToStream() {}
+
+ /**
+ * Reads an Arrow file from in and writes it back to out.
+ */
+ public static void convert(FileInputStream in, OutputStream out) throws IOException {
+ BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+ try (ArrowFileReader reader = new ArrowFileReader(in.getChannel(), allocator)) {
+ VectorSchemaRoot root = reader.getVectorSchemaRoot();
+ // load the first batch before instantiating the writer so that we have any dictionaries
+ // only writeBatches if we loaded one in the first place.
+ boolean writeBatches = reader.loadNextBatch();
+ try (ArrowStreamWriter writer = new ArrowStreamWriter(root, reader, out)) {
+ writer.start();
+ while (writeBatches) {
+ writer.writeBatch();
+ if (!reader.loadNextBatch()) {
+ break;
+ }
+ }
+ writer.end();
+ }
+ }
+ }
+
+ /**
+ * Main method. The first arg is the file path. The second, optional argument,
+ * is an output file location (defaults to standard out).
+ */
+ public static void main(String[] args) throws IOException {
+ if (args.length != 1 && args.length != 2) {
+ System.err.println("Usage: FileToStream <input file> [output file]");
+ System.exit(1);
+ }
+
+ FileInputStream in = new FileInputStream(new File(args[0]));
+ OutputStream out = args.length == 1 ?
+ System.out : new FileOutputStream(new File(args[1]));
+
+ convert(in, out);
+ }
+}
diff --git a/src/arrow/java/tools/src/main/java/org/apache/arrow/tools/Integration.java b/src/arrow/java/tools/src/main/java/org/apache/arrow/tools/Integration.java
new file mode 100644
index 000000000..1db3eeb64
--- /dev/null
+++ b/src/arrow/java/tools/src/main/java/org/apache/arrow/tools/Integration.java
@@ -0,0 +1,244 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.tools;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.arrow.compression.CommonsCompressionFactory;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.ipc.ArrowFileReader;
+import org.apache.arrow.vector.ipc.ArrowFileWriter;
+import org.apache.arrow.vector.ipc.JsonFileReader;
+import org.apache.arrow.vector.ipc.JsonFileWriter;
+import org.apache.arrow.vector.ipc.message.ArrowBlock;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.util.Validator;
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.cli.PosixParser;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Application for cross language integration testing.
+ */
+public class Integration {
+ private static final Logger LOGGER = LoggerFactory.getLogger(Integration.class);
+ private final Options options;
+
+ Integration() {
+ this.options = new Options();
+ this.options.addOption("a", "arrow", true, "arrow file");
+ this.options.addOption("j", "json", true, "json file");
+ this.options.addOption("c", "command", true, "command to execute: " + Arrays.toString(Command
+ .values()));
+ }
+
+ /**
+ * Main method.
+ */
+ public static void main(String[] args) {
+ try {
+ new Integration().run(args);
+ } catch (ParseException e) {
+ fatalError("Invalid parameters", e);
+ } catch (IOException e) {
+ fatalError("Error accessing files", e);
+ } catch (RuntimeException e) {
+ fatalError("Incompatible files", e);
+ }
+ }
+
+ private static void fatalError(String message, Throwable e) {
+ System.err.println(message);
+ System.err.println(e.getMessage());
+ LOGGER.error(message, e);
+ System.exit(1);
+ }
+
+ private File validateFile(String type, String fileName, boolean shouldExist) {
+ if (fileName == null) {
+ throw new IllegalArgumentException("missing " + type + " file parameter");
+ }
+ File f = new File(fileName);
+ if (shouldExist && (!f.exists() || f.isDirectory())) {
+ throw new IllegalArgumentException(type + " file not found: " + f.getAbsolutePath());
+ }
+ if (!shouldExist && f.exists()) {
+ throw new IllegalArgumentException(type + " file already exists: " + f.getAbsolutePath());
+ }
+ return f;
+ }
+
+ static void extractDictionaryEncodings(List<Field> fields, List<DictionaryEncoding> encodings) {
+ for (Field field : fields) {
+ DictionaryEncoding encoding = field.getDictionary();
+ if (encoding != null) {
+ encodings.add(encoding);
+ }
+
+ extractDictionaryEncodings(field.getChildren(), encodings);
+ }
+ }
+
+ void run(String[] args) throws ParseException, IOException {
+ CommandLineParser parser = new PosixParser();
+ CommandLine cmd = parser.parse(options, args, false);
+
+ Command command = toCommand(cmd.getOptionValue("command"));
+ File arrowFile = validateFile("arrow", cmd.getOptionValue("arrow"), command.arrowExists);
+ File jsonFile = validateFile("json", cmd.getOptionValue("json"), command.jsonExists);
+ command.execute(arrowFile, jsonFile);
+ }
+
+ private Command toCommand(String commandName) {
+ try {
+ return Command.valueOf(commandName);
+ } catch (IllegalArgumentException e) {
+ throw new IllegalArgumentException("Unknown command: " + commandName + " expected one of " +
+ Arrays.toString(Command.values()));
+ }
+ }
+
+ /**
+ * Commands (actions) the application can perform.
+ */
+ enum Command {
+ ARROW_TO_JSON(true, false) {
+ @Override
+ public void execute(File arrowFile, File jsonFile) throws IOException {
+ try (BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+ FileInputStream fileInputStream = new FileInputStream(arrowFile);
+ ArrowFileReader arrowReader = new ArrowFileReader(fileInputStream.getChannel(),
+ allocator)) {
+ VectorSchemaRoot root = arrowReader.getVectorSchemaRoot();
+ Schema schema = root.getSchema();
+ LOGGER.debug("Input file size: " + arrowFile.length());
+ LOGGER.debug("Found schema: " + schema);
+ try (JsonFileWriter writer = new JsonFileWriter(jsonFile, JsonFileWriter.config()
+ .pretty(true))) {
+ writer.start(schema, arrowReader);
+ for (ArrowBlock rbBlock : arrowReader.getRecordBlocks()) {
+ if (!arrowReader.loadRecordBatch(rbBlock)) {
+ throw new IOException("Expected to load record batch");
+ }
+ writer.write(root);
+ }
+ }
+ LOGGER.debug("Output file size: " + jsonFile.length());
+ }
+ }
+ },
+ JSON_TO_ARROW(false, true) {
+ @Override
+ public void execute(File arrowFile, File jsonFile) throws IOException {
+ try (BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+ JsonFileReader reader = new JsonFileReader(jsonFile, allocator)) {
+ Schema schema = reader.start();
+ LOGGER.debug("Input file size: " + jsonFile.length());
+ LOGGER.debug("Found schema: " + schema);
+ try (FileOutputStream fileOutputStream = new FileOutputStream(arrowFile);
+ VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator);
+ // TODO json dictionaries
+ ArrowFileWriter arrowWriter = new ArrowFileWriter(root, reader, fileOutputStream
+ .getChannel())) {
+ arrowWriter.start();
+ while (reader.read(root)) {
+ arrowWriter.writeBatch();
+ }
+ arrowWriter.end();
+ }
+ LOGGER.debug("Output file size: " + arrowFile.length());
+ }
+ }
+ },
+ VALIDATE(true, true) {
+ @Override
+ public void execute(File arrowFile, File jsonFile) throws IOException {
+ try (BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+ JsonFileReader jsonReader = new JsonFileReader(jsonFile, allocator);
+ FileInputStream fileInputStream = new FileInputStream(arrowFile);
+ ArrowFileReader arrowReader = new ArrowFileReader(fileInputStream.getChannel(),
+ allocator, CommonsCompressionFactory.INSTANCE)) {
+ Schema jsonSchema = jsonReader.start();
+ VectorSchemaRoot arrowRoot = arrowReader.getVectorSchemaRoot();
+ Schema arrowSchema = arrowRoot.getSchema();
+ LOGGER.debug("Arrow Input file size: " + arrowFile.length());
+ LOGGER.debug("ARROW schema: " + arrowSchema);
+ LOGGER.debug("JSON Input file size: " + jsonFile.length());
+ LOGGER.debug("JSON schema: " + jsonSchema);
+ Validator.compareSchemas(jsonSchema, arrowSchema);
+
+ List<ArrowBlock> recordBatches = arrowReader.getRecordBlocks();
+ Iterator<ArrowBlock> iterator = recordBatches.iterator();
+ VectorSchemaRoot jsonRoot;
+ int totalBatches = 0;
+ while ((jsonRoot = jsonReader.read()) != null && iterator.hasNext()) {
+ ArrowBlock rbBlock = iterator.next();
+ if (!arrowReader.loadRecordBatch(rbBlock)) {
+ throw new IOException("Expected to load record batch");
+ }
+ Validator.compareVectorSchemaRoot(arrowRoot, jsonRoot);
+ jsonRoot.close();
+ totalBatches++;
+ }
+
+ // Validate Dictionaries after ArrowFileReader has read batches
+ List<DictionaryEncoding> encodingsJson = new ArrayList<>();
+ extractDictionaryEncodings(jsonSchema.getFields(), encodingsJson);
+ List<DictionaryEncoding> encodingsArrow = new ArrayList<>();
+ extractDictionaryEncodings(arrowSchema.getFields(), encodingsArrow);
+ Validator.compareDictionaries(encodingsJson, encodingsArrow, jsonReader, arrowReader);
+
+ boolean hasMoreJSON = jsonRoot != null;
+ boolean hasMoreArrow = iterator.hasNext();
+ if (hasMoreJSON || hasMoreArrow) {
+ throw new IllegalArgumentException("Unexpected RecordBatches. Total: " + totalBatches +
+ " J:" + hasMoreJSON + " " +
+ "A:" + hasMoreArrow);
+ }
+ }
+ }
+ };
+
+ public final boolean arrowExists;
+ public final boolean jsonExists;
+
+ Command(boolean arrowExists, boolean jsonExists) {
+ this.arrowExists = arrowExists;
+ this.jsonExists = jsonExists;
+ }
+
+ public abstract void execute(File arrowFile, File jsonFile) throws IOException;
+
+ }
+
+}
diff --git a/src/arrow/java/tools/src/main/java/org/apache/arrow/tools/StreamToFile.java b/src/arrow/java/tools/src/main/java/org/apache/arrow/tools/StreamToFile.java
new file mode 100644
index 000000000..6bd3c2fba
--- /dev/null
+++ b/src/arrow/java/tools/src/main/java/org/apache/arrow/tools/StreamToFile.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.tools;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.channels.Channels;
+
+import org.apache.arrow.compression.CommonsCompressionFactory;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.ipc.ArrowFileWriter;
+import org.apache.arrow.vector.ipc.ArrowStreamReader;
+
+/**
+ * Converts an Arrow stream to an Arrow file.
+ */
+public class StreamToFile {
+ /**
+ * Reads an Arrow stream from <code>in</code> and writes it to <code>out</code>.
+ */
+ public static void convert(InputStream in, OutputStream out) throws IOException {
+ BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+ try (ArrowStreamReader reader = new ArrowStreamReader(in, allocator, CommonsCompressionFactory.INSTANCE)) {
+ VectorSchemaRoot root = reader.getVectorSchemaRoot();
+ // load the first batch before instantiating the writer so that we have any dictionaries.
+ // Only writeBatches if we load the first one.
+ boolean writeBatches = reader.loadNextBatch();
+ try (ArrowFileWriter writer = new ArrowFileWriter(root, reader, Channels.newChannel(out))) {
+ writer.start();
+ while (writeBatches) {
+ writer.writeBatch();
+ if (!reader.loadNextBatch()) {
+ break;
+ }
+ }
+ writer.end();
+ }
+ }
+ }
+
+ /**
+ * Main method. Defaults to reading from standard in and standard out.
+ * If there are two arguments the first is interpreted as the input file path,
+ * the second is the output file path.
+ */
+ public static void main(String[] args) throws IOException {
+ InputStream in = System.in;
+ OutputStream out = System.out;
+ if (args.length == 2) {
+ in = new FileInputStream(new File(args[0]));
+ out = new FileOutputStream(new File(args[1]));
+ }
+ convert(in, out);
+ }
+}
diff --git a/src/arrow/java/tools/src/test/java/org/apache/arrow/tools/ArrowFileTestFixtures.java b/src/arrow/java/tools/src/test/java/org/apache/arrow/tools/ArrowFileTestFixtures.java
new file mode 100644
index 000000000..178a0834f
--- /dev/null
+++ b/src/arrow/java/tools/src/test/java/org/apache/arrow/tools/ArrowFileTestFixtures.java
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.tools;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.complex.NonNullableStructVector;
+import org.apache.arrow.vector.complex.impl.ComplexWriterImpl;
+import org.apache.arrow.vector.complex.writer.BaseWriter.ComplexWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter;
+import org.apache.arrow.vector.complex.writer.BigIntWriter;
+import org.apache.arrow.vector.complex.writer.IntWriter;
+import org.apache.arrow.vector.ipc.ArrowFileReader;
+import org.apache.arrow.vector.ipc.ArrowFileWriter;
+import org.apache.arrow.vector.ipc.message.ArrowBlock;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.Assert;
+
+public class ArrowFileTestFixtures {
+ static final int COUNT = 10;
+
+ static void writeData(int count, NonNullableStructVector parent) {
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ StructWriter rootWriter = writer.rootAsStruct();
+ IntWriter intWriter = rootWriter.integer("int");
+ BigIntWriter bigIntWriter = rootWriter.bigInt("bigInt");
+ for (int i = 0; i < count; i++) {
+ intWriter.setPosition(i);
+ intWriter.writeInt(i);
+ bigIntWriter.setPosition(i);
+ bigIntWriter.writeBigInt(i);
+ }
+ writer.setValueCount(count);
+ }
+
+ static void validateOutput(File testOutFile, BufferAllocator allocator) throws Exception {
+ // read
+ try (BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer
+ .MAX_VALUE);
+ FileInputStream fileInputStream = new FileInputStream(testOutFile);
+ ArrowFileReader arrowReader = new ArrowFileReader(fileInputStream.getChannel(),
+ readerAllocator)) {
+ VectorSchemaRoot root = arrowReader.getVectorSchemaRoot();
+ Schema schema = root.getSchema();
+ for (ArrowBlock rbBlock : arrowReader.getRecordBlocks()) {
+ if (!arrowReader.loadRecordBatch(rbBlock)) {
+ throw new IOException("Expected to read record batch");
+ }
+ validateContent(COUNT, root);
+ }
+ }
+ }
+
+ static void validateContent(int count, VectorSchemaRoot root) {
+ Assert.assertEquals(count, root.getRowCount());
+ for (int i = 0; i < count; i++) {
+ Assert.assertEquals(i, root.getVector("int").getObject(i));
+ Assert.assertEquals(Long.valueOf(i), root.getVector("bigInt").getObject(i));
+ }
+ }
+
+ static void write(FieldVector parent, File file) throws FileNotFoundException, IOException {
+ VectorSchemaRoot root = new VectorSchemaRoot(parent);
+ try (FileOutputStream fileOutputStream = new FileOutputStream(file);
+ ArrowFileWriter arrowWriter = new ArrowFileWriter(root, null, fileOutputStream
+ .getChannel())) {
+ arrowWriter.writeBatch();
+ }
+ }
+
+
+ static void writeInput(File testInFile, BufferAllocator allocator) throws
+ FileNotFoundException, IOException {
+ int count = ArrowFileTestFixtures.COUNT;
+ try (
+ BufferAllocator vectorAllocator = allocator.newChildAllocator("original vectors", 0,
+ Integer.MAX_VALUE);
+ NonNullableStructVector parent = NonNullableStructVector.empty("parent", vectorAllocator)) {
+ writeData(count, parent);
+ write(parent.getChild("root"), testInFile);
+ }
+ }
+}
diff --git a/src/arrow/java/tools/src/test/java/org/apache/arrow/tools/EchoServerTest.java b/src/arrow/java/tools/src/test/java/org/apache/arrow/tools/EchoServerTest.java
new file mode 100644
index 000000000..714cb416b
--- /dev/null
+++ b/src/arrow/java/tools/src/test/java/org/apache/arrow/tools/EchoServerTest.java
@@ -0,0 +1,301 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.tools;
+
+import static java.util.Arrays.asList;
+import static org.apache.arrow.vector.types.Types.MinorType.TINYINT;
+import static org.apache.arrow.vector.types.Types.MinorType.VARCHAR;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.net.Socket;
+import java.net.UnknownHostException;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.TinyIntVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.impl.UnionListWriter;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.dictionary.DictionaryProvider.MapDictionaryProvider;
+import org.apache.arrow.vector.ipc.ArrowStreamReader;
+import org.apache.arrow.vector.ipc.ArrowStreamWriter;
+import org.apache.arrow.vector.testing.ValueVectorDataPopulator;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.ArrowType.Int;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.util.Text;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import com.google.common.collect.ImmutableList;
+
+public class EchoServerTest {
+
+ private static EchoServer server;
+ private static int serverPort;
+ private static Thread serverThread;
+
+ @BeforeClass
+ public static void startEchoServer() throws IOException {
+ server = new EchoServer(0);
+ serverPort = server.port();
+ serverThread = new Thread() {
+ @Override
+ public void run() {
+ try {
+ server.run();
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+ };
+ serverThread.start();
+ }
+
+ @AfterClass
+ public static void stopEchoServer() throws IOException, InterruptedException {
+ server.close();
+ serverThread.join();
+ }
+
+ private void testEchoServer(int serverPort,
+ Field field,
+ TinyIntVector vector,
+ int batches)
+ throws UnknownHostException, IOException {
+ VectorSchemaRoot root = new VectorSchemaRoot(asList(field), asList((FieldVector) vector), 0);
+ try (BufferAllocator alloc = new RootAllocator(Long.MAX_VALUE);
+ Socket socket = new Socket("localhost", serverPort);
+ ArrowStreamWriter writer = new ArrowStreamWriter(root, null, socket.getOutputStream());
+ ArrowStreamReader reader = new ArrowStreamReader(socket.getInputStream(), alloc)) {
+ writer.start();
+ for (int i = 0; i < batches; i++) {
+ vector.allocateNew(16);
+ for (int j = 0; j < 8; j++) {
+ vector.set(j, j + i);
+ vector.set(j + 8, 0, (byte) (j + i));
+ }
+ vector.setValueCount(16);
+ root.setRowCount(16);
+ writer.writeBatch();
+ }
+ writer.end();
+
+ assertEquals(new Schema(asList(field)), reader.getVectorSchemaRoot().getSchema());
+
+ TinyIntVector readVector = (TinyIntVector) reader.getVectorSchemaRoot()
+ .getFieldVectors().get(0);
+ for (int i = 0; i < batches; i++) {
+ Assert.assertTrue(reader.loadNextBatch());
+ assertEquals(16, reader.getVectorSchemaRoot().getRowCount());
+ assertEquals(16, readVector.getValueCount());
+ for (int j = 0; j < 8; j++) {
+ assertEquals(j + i, readVector.get(j));
+ assertTrue(readVector.isNull(j + 8));
+ }
+ }
+ Assert.assertFalse(reader.loadNextBatch());
+ assertEquals(0, reader.getVectorSchemaRoot().getRowCount());
+ assertEquals(reader.bytesRead(), writer.bytesWritten());
+ }
+ }
+
+ @Test
+ public void basicTest() throws InterruptedException, IOException {
+ BufferAllocator alloc = new RootAllocator(Long.MAX_VALUE);
+
+ Field field = new Field(
+ "testField",
+ new FieldType(true, new ArrowType.Int(8, true), null, null),
+ Collections.<Field>emptyList());
+ TinyIntVector vector =
+ new TinyIntVector("testField", FieldType.nullable(TINYINT.getType()), alloc);
+ Schema schema = new Schema(asList(field));
+
+ // Try an empty stream, just the header.
+ testEchoServer(serverPort, field, vector, 0);
+
+ // Try with one batch.
+ testEchoServer(serverPort, field, vector, 1);
+
+ // Try with a few
+ testEchoServer(serverPort, field, vector, 10);
+ }
+
+ @Test
+ public void testFlatDictionary() throws IOException {
+ DictionaryEncoding writeEncoding = new DictionaryEncoding(1L, false, null);
+ try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+ IntVector writeVector =
+ new IntVector(
+ "varchar",
+ new FieldType(true, MinorType.INT.getType(), writeEncoding, null),
+ allocator);
+ VarCharVector writeDictionaryVector =
+ new VarCharVector(
+ "dict",
+ FieldType.nullable(VARCHAR.getType()),
+ allocator)) {
+
+ ValueVectorDataPopulator.setVector(writeVector, 0, 1, null, 2, 1, 2);
+ ValueVectorDataPopulator.setVector(writeDictionaryVector, "foo".getBytes(StandardCharsets.UTF_8),
+ "bar".getBytes(StandardCharsets.UTF_8), "baz".getBytes(StandardCharsets.UTF_8));
+
+ List<Field> fields = ImmutableList.of(writeVector.getField());
+ List<FieldVector> vectors = ImmutableList.of((FieldVector) writeVector);
+ VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors, 6);
+
+ DictionaryProvider writeProvider = new MapDictionaryProvider(
+ new Dictionary(writeDictionaryVector, writeEncoding));
+
+ try (Socket socket = new Socket("localhost", serverPort);
+ ArrowStreamWriter writer = new ArrowStreamWriter(root, writeProvider, socket
+ .getOutputStream());
+ ArrowStreamReader reader = new ArrowStreamReader(socket.getInputStream(), allocator)) {
+ writer.start();
+ writer.writeBatch();
+ writer.end();
+
+ reader.loadNextBatch();
+ VectorSchemaRoot readerRoot = reader.getVectorSchemaRoot();
+ Assert.assertEquals(6, readerRoot.getRowCount());
+
+ FieldVector readVector = readerRoot.getFieldVectors().get(0);
+ Assert.assertNotNull(readVector);
+
+ DictionaryEncoding readEncoding = readVector.getField().getDictionary();
+ Assert.assertNotNull(readEncoding);
+ Assert.assertEquals(1L, readEncoding.getId());
+
+ Assert.assertEquals(6, readVector.getValueCount());
+ Assert.assertEquals(0, readVector.getObject(0));
+ Assert.assertEquals(1, readVector.getObject(1));
+ Assert.assertEquals(null, readVector.getObject(2));
+ Assert.assertEquals(2, readVector.getObject(3));
+ Assert.assertEquals(1, readVector.getObject(4));
+ Assert.assertEquals(2, readVector.getObject(5));
+
+ Dictionary dictionary = reader.lookup(1L);
+ Assert.assertNotNull(dictionary);
+ VarCharVector dictionaryVector = ((VarCharVector) dictionary.getVector());
+ Assert.assertEquals(3, dictionaryVector.getValueCount());
+ Assert.assertEquals(new Text("foo"), dictionaryVector.getObject(0));
+ Assert.assertEquals(new Text("bar"), dictionaryVector.getObject(1));
+ Assert.assertEquals(new Text("baz"), dictionaryVector.getObject(2));
+ }
+ }
+ }
+
+ @Test
+ public void testNestedDictionary() throws IOException {
+ DictionaryEncoding writeEncoding = new DictionaryEncoding(2L, false, null);
+ try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+ VarCharVector writeDictionaryVector =
+ new VarCharVector("dictionary", FieldType.nullable(VARCHAR.getType()), allocator);
+ ListVector writeVector = ListVector.empty("list", allocator)) {
+
+ // data being written:
+ // [['foo', 'bar'], ['foo'], ['bar']] -> [[0, 1], [0], [1]]
+
+ writeDictionaryVector.allocateNew();
+ writeDictionaryVector.set(0, "foo".getBytes(StandardCharsets.UTF_8));
+ writeDictionaryVector.set(1, "bar".getBytes(StandardCharsets.UTF_8));
+ writeDictionaryVector.setValueCount(2);
+
+ writeVector.addOrGetVector(new FieldType(true, MinorType.INT.getType(), writeEncoding, null));
+ writeVector.allocateNew();
+ UnionListWriter listWriter = new UnionListWriter(writeVector);
+ listWriter.startList();
+ listWriter.writeInt(0);
+ listWriter.writeInt(1);
+ listWriter.endList();
+ listWriter.startList();
+ listWriter.writeInt(0);
+ listWriter.endList();
+ listWriter.startList();
+ listWriter.writeInt(1);
+ listWriter.endList();
+ listWriter.setValueCount(3);
+
+ List<Field> fields = ImmutableList.of(writeVector.getField());
+ List<FieldVector> vectors = ImmutableList.of((FieldVector) writeVector);
+ VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors, 3);
+
+ DictionaryProvider writeProvider = new MapDictionaryProvider(
+ new Dictionary(writeDictionaryVector, writeEncoding));
+
+ try (Socket socket = new Socket("localhost", serverPort);
+ ArrowStreamWriter writer = new ArrowStreamWriter(root, writeProvider, socket
+ .getOutputStream());
+ ArrowStreamReader reader = new ArrowStreamReader(socket.getInputStream(), allocator)) {
+ writer.start();
+ writer.writeBatch();
+ writer.end();
+
+ reader.loadNextBatch();
+ VectorSchemaRoot readerRoot = reader.getVectorSchemaRoot();
+ Assert.assertEquals(3, readerRoot.getRowCount());
+
+ ListVector readVector = (ListVector) readerRoot.getFieldVectors().get(0);
+ Assert.assertNotNull(readVector);
+
+ Assert.assertNull(readVector.getField().getDictionary());
+ DictionaryEncoding readEncoding = readVector.getField().getChildren().get(0)
+ .getDictionary();
+ Assert.assertNotNull(readEncoding);
+ Assert.assertEquals(2L, readEncoding.getId());
+
+ Field nestedField = readVector.getField().getChildren().get(0);
+
+ DictionaryEncoding encoding = nestedField.getDictionary();
+ Assert.assertNotNull(encoding);
+ Assert.assertEquals(2L, encoding.getId());
+ Assert.assertEquals(new Int(32, true), encoding.getIndexType());
+
+ Assert.assertEquals(3, readVector.getValueCount());
+ Assert.assertEquals(Arrays.asList(0, 1), readVector.getObject(0));
+ Assert.assertEquals(Arrays.asList(0), readVector.getObject(1));
+ Assert.assertEquals(Arrays.asList(1), readVector.getObject(2));
+
+ Dictionary readDictionary = reader.lookup(2L);
+ Assert.assertNotNull(readDictionary);
+ VarCharVector dictionaryVector = ((VarCharVector) readDictionary.getVector());
+ Assert.assertEquals(2, dictionaryVector.getValueCount());
+ Assert.assertEquals(new Text("foo"), dictionaryVector.getObject(0));
+ Assert.assertEquals(new Text("bar"), dictionaryVector.getObject(1));
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/tools/src/test/java/org/apache/arrow/tools/TestFileRoundtrip.java b/src/arrow/java/tools/src/test/java/org/apache/arrow/tools/TestFileRoundtrip.java
new file mode 100644
index 000000000..ddac6f793
--- /dev/null
+++ b/src/arrow/java/tools/src/test/java/org/apache/arrow/tools/TestFileRoundtrip.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.tools;
+
+import static org.apache.arrow.tools.ArrowFileTestFixtures.validateOutput;
+import static org.apache.arrow.tools.ArrowFileTestFixtures.writeInput;
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+public class TestFileRoundtrip {
+
+ @Rule
+ public TemporaryFolder testFolder = new TemporaryFolder();
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Integer.MAX_VALUE);
+ }
+
+ @After
+ public void tearDown() {
+ allocator.close();
+ }
+
+ @Test
+ public void test() throws Exception {
+ File testInFile = testFolder.newFile("testIn.arrow");
+ File testOutFile = testFolder.newFile("testOut.arrow");
+
+ writeInput(testInFile, allocator);
+
+ String[] args = {"-i", testInFile.getAbsolutePath(), "-o", testOutFile.getAbsolutePath()};
+ int result = new FileRoundtrip(System.out, System.err).run(args);
+ assertEquals(0, result);
+
+ validateOutput(testOutFile, allocator);
+ }
+
+}
diff --git a/src/arrow/java/tools/src/test/java/org/apache/arrow/tools/TestIntegration.java b/src/arrow/java/tools/src/test/java/org/apache/arrow/tools/TestIntegration.java
new file mode 100644
index 000000000..1232c6c1d
--- /dev/null
+++ b/src/arrow/java/tools/src/test/java/org/apache/arrow/tools/TestIntegration.java
@@ -0,0 +1,288 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.tools;
+
+import static org.apache.arrow.tools.ArrowFileTestFixtures.validateOutput;
+import static org.apache.arrow.tools.ArrowFileTestFixtures.write;
+import static org.apache.arrow.tools.ArrowFileTestFixtures.writeData;
+import static org.apache.arrow.tools.ArrowFileTestFixtures.writeInput;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.Map;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.tools.Integration.Command;
+import org.apache.arrow.vector.complex.NonNullableStructVector;
+import org.apache.arrow.vector.complex.impl.ComplexWriterImpl;
+import org.apache.arrow.vector.complex.writer.BaseWriter.ComplexWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter;
+import org.apache.arrow.vector.complex.writer.BigIntWriter;
+import org.apache.arrow.vector.complex.writer.Float8Writer;
+import org.apache.arrow.vector.complex.writer.IntWriter;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+import com.fasterxml.jackson.core.util.DefaultPrettyPrinter;
+import com.fasterxml.jackson.core.util.DefaultPrettyPrinter.NopIndenter;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.SerializationFeature;
+
+public class TestIntegration {
+
+ @Rule
+ public TemporaryFolder testFolder = new TemporaryFolder();
+
+ private BufferAllocator allocator;
+ private ObjectMapper om = new ObjectMapper();
+
+ {
+ DefaultPrettyPrinter prettyPrinter = new DefaultPrettyPrinter();
+ prettyPrinter.indentArraysWith(NopIndenter.instance);
+ om.setDefaultPrettyPrinter(prettyPrinter);
+ om.enable(SerializationFeature.INDENT_OUTPUT);
+ om.enable(SerializationFeature.ORDER_MAP_ENTRIES_BY_KEYS);
+ }
+
+ static void writeInputFloat(File testInFile, BufferAllocator allocator, double... f) throws
+ FileNotFoundException, IOException {
+ try (
+ BufferAllocator vectorAllocator = allocator.newChildAllocator("original vectors", 0,
+ Integer.MAX_VALUE);
+ NonNullableStructVector parent = NonNullableStructVector.empty("parent", vectorAllocator)) {
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ StructWriter rootWriter = writer.rootAsStruct();
+ Float8Writer floatWriter = rootWriter.float8("float");
+ for (int i = 0; i < f.length; i++) {
+ floatWriter.setPosition(i);
+ floatWriter.writeFloat8(f[i]);
+ }
+ writer.setValueCount(f.length);
+ write(parent.getChild("root"), testInFile);
+ }
+ }
+
+ static void writeInput2(File testInFile, BufferAllocator allocator) throws
+ FileNotFoundException, IOException {
+ int count = ArrowFileTestFixtures.COUNT;
+ try (
+ BufferAllocator vectorAllocator = allocator.newChildAllocator("original vectors", 0,
+ Integer.MAX_VALUE);
+ NonNullableStructVector parent = NonNullableStructVector.empty("parent", vectorAllocator)) {
+ writeData(count, parent);
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ StructWriter rootWriter = writer.rootAsStruct();
+ IntWriter intWriter = rootWriter.integer("int");
+ BigIntWriter bigIntWriter = rootWriter.bigInt("bigInt");
+ intWriter.setPosition(5);
+ intWriter.writeInt(999);
+ bigIntWriter.setPosition(4);
+ bigIntWriter.writeBigInt(777L);
+ writer.setValueCount(count);
+ write(parent.getChild("root"), testInFile);
+ }
+ }
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Integer.MAX_VALUE);
+ }
+
+ @After
+ public void tearDown() {
+ allocator.close();
+ }
+
+ @Test
+ public void testValid() throws Exception {
+ File testInFile = testFolder.newFile("testIn.arrow");
+ File testJSONFile = testFolder.newFile("testOut.json");
+ testJSONFile.delete();
+ File testOutFile = testFolder.newFile("testOut.arrow");
+ testOutFile.delete();
+
+ // generate an arrow file
+ writeInput(testInFile, allocator);
+
+ Integration integration = new Integration();
+
+ // convert it to json
+ String[] args1 = {"-arrow", testInFile.getAbsolutePath(), "-json", testJSONFile
+ .getAbsolutePath(), "-command", Command.ARROW_TO_JSON.name()};
+ integration.run(args1);
+
+ // convert back to arrow
+ String[] args2 = {"-arrow", testOutFile.getAbsolutePath(), "-json", testJSONFile
+ .getAbsolutePath(), "-command", Command.JSON_TO_ARROW.name()};
+ integration.run(args2);
+
+ // check it is the same
+ validateOutput(testOutFile, allocator);
+
+ // validate arrow against json
+ String[] args3 = {"-arrow", testInFile.getAbsolutePath(), "-json", testJSONFile
+ .getAbsolutePath(), "-command", Command.VALIDATE.name()};
+ integration.run(args3);
+ }
+
+ @Test
+ public void testJSONRoundTripWithVariableWidth() throws Exception {
+ File testJSONFile = new File("../../docs/source/format/integration_json_examples/simple.json").getCanonicalFile();
+ if (!testJSONFile.exists()) {
+ testJSONFile = new File("../docs/source/format/integration_json_examples/simple.json");
+ }
+ File testOutFile = testFolder.newFile("testOut.arrow");
+ File testRoundTripJSONFile = testFolder.newFile("testOut.json");
+ testOutFile.delete();
+ testRoundTripJSONFile.delete();
+
+ Integration integration = new Integration();
+
+ // convert to arrow
+ String[] args1 = {"-arrow", testOutFile.getAbsolutePath(), "-json", testJSONFile
+ .getAbsolutePath(), "-command", Command.JSON_TO_ARROW.name()};
+ integration.run(args1);
+
+ // convert back to json
+ String[] args2 = {"-arrow", testOutFile.getAbsolutePath(), "-json", testRoundTripJSONFile
+ .getAbsolutePath(), "-command", Command.ARROW_TO_JSON.name()};
+ integration.run(args2);
+
+ BufferedReader orig = readNormalized(testJSONFile);
+ BufferedReader rt = readNormalized(testRoundTripJSONFile);
+ String i;
+ String o;
+ int j = 0;
+ while ((i = orig.readLine()) != null && (o = rt.readLine()) != null) {
+ assertEquals("line: " + j, i, o);
+ ++j;
+ }
+ }
+
+ @Test
+ public void testJSONRoundTripWithStruct() throws Exception {
+ File testJSONFile = new File("../../docs/source/format/integration_json_examples/struct.json").getCanonicalFile();
+ if (!testJSONFile.exists()) {
+ testJSONFile = new File("../docs/source/format/integration_json_examples/struct.json");
+ }
+ File testOutFile = testFolder.newFile("testOutStruct.arrow");
+ File testRoundTripJSONFile = testFolder.newFile("testOutStruct.json");
+ testOutFile.delete();
+ testRoundTripJSONFile.delete();
+
+ Integration integration = new Integration();
+
+ // convert to arrow
+ String[] args1 = {"-arrow", testOutFile.getAbsolutePath(), "-json", testJSONFile
+ .getAbsolutePath(), "-command", Command.JSON_TO_ARROW.name()};
+ integration.run(args1);
+
+ // convert back to json
+ String[] args2 = {"-arrow", testOutFile.getAbsolutePath(), "-json", testRoundTripJSONFile
+ .getAbsolutePath(), "-command", Command.ARROW_TO_JSON.name()};
+ integration.run(args2);
+
+ BufferedReader orig = readNormalized(testJSONFile);
+ BufferedReader rt = readNormalized(testRoundTripJSONFile);
+ String i;
+ String o;
+ int j = 0;
+ while ((i = orig.readLine()) != null && (o = rt.readLine()) != null) {
+ assertEquals("line: " + j, i, o);
+ ++j;
+ }
+ }
+
+ private BufferedReader readNormalized(File f) throws IOException {
+ Map<?, ?> tree = om.readValue(f.getCanonicalFile(), Map.class);
+ String normalized = om.writeValueAsString(tree);
+ return new BufferedReader(new StringReader(normalized));
+ }
+
+ /**
+ * The test should not be sensitive to small variations in float representation.
+ */
+ @Test
+ public void testFloat() throws Exception {
+ File testValidInFile = testFolder.newFile("testValidFloatIn.arrow");
+ File testInvalidInFile = testFolder.newFile("testAlsoValidFloatIn.arrow");
+ File testJSONFile = testFolder.newFile("testValidOut.json");
+ testJSONFile.delete();
+
+ // generate an arrow file
+ writeInputFloat(testValidInFile, allocator, 912.4140000000002, 912.414);
+ // generate a different arrow file
+ writeInputFloat(testInvalidInFile, allocator, 912.414, 912.4140000000002);
+
+ Integration integration = new Integration();
+
+ // convert the "valid" file to json
+ String[] args1 = {"-arrow", testValidInFile.getAbsolutePath(), "-json", testJSONFile
+ .getAbsolutePath(), "-command", Command.ARROW_TO_JSON.name()};
+ integration.run(args1);
+
+ // compare the "invalid" file to the "valid" json
+ String[] args3 = {"-arrow", testInvalidInFile.getAbsolutePath(), "-json", testJSONFile
+ .getAbsolutePath(), "-command", Command.VALIDATE.name()};
+ // this should fail
+ integration.run(args3);
+ }
+
+ @Test
+ public void testInvalid() throws Exception {
+ File testValidInFile = testFolder.newFile("testValidIn.arrow");
+ File testInvalidInFile = testFolder.newFile("testInvalidIn.arrow");
+ File testJSONFile = testFolder.newFile("testInvalidOut.json");
+ testJSONFile.delete();
+
+ // generate an arrow file
+ writeInput(testValidInFile, allocator);
+ // generate a different arrow file
+ writeInput2(testInvalidInFile, allocator);
+
+ Integration integration = new Integration();
+
+ // convert the "valid" file to json
+ String[] args1 = {"-arrow", testValidInFile.getAbsolutePath(), "-json", testJSONFile
+ .getAbsolutePath(), "-command", Command.ARROW_TO_JSON.name()};
+ integration.run(args1);
+
+ // compare the "invalid" file to the "valid" json
+ String[] args3 = {"-arrow", testInvalidInFile.getAbsolutePath(), "-json", testJSONFile
+ .getAbsolutePath(), "-command", Command.VALIDATE.name()};
+ // this should fail
+ try {
+ integration.run(args3);
+ fail("should have failed");
+ } catch (IllegalArgumentException e) {
+ assertTrue(e.getMessage(), e.getMessage().contains("Different values in column"));
+ assertTrue(e.getMessage(), e.getMessage().contains("999"));
+ }
+
+ }
+}
diff --git a/src/arrow/java/tools/src/test/resources/logback.xml b/src/arrow/java/tools/src/test/resources/logback.xml
new file mode 100644
index 000000000..ff848da2a
--- /dev/null
+++ b/src/arrow/java/tools/src/test/resources/logback.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+ license agreements. See the NOTICE file distributed with this work for additional
+ information regarding copyright ownership. The ASF licenses this file to
+ You under the Apache License, Version 2.0 (the "License"); you may not use
+ this file except in compliance with the License. You may obtain a copy of
+ the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+ by applicable law or agreed to in writing, software distributed under the
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ OF ANY KIND, either express or implied. See the License for the specific
+ language governing permissions and limitations under the License. -->
+
+<configuration>
+ <statusListener class="ch.qos.logback.core.status.NopStatusListener"/>
+ <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
+ <!-- encoders are assigned the type
+ ch.qos.logback.classic.encoder.PatternLayoutEncoder by default -->
+ <encoder>
+ <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
+ </encoder>
+ </appender>
+ <logger name="org.apache.arrow" additivity="false">
+ <level value="info" />
+ <appender-ref ref="STDOUT" />
+ </logger>
+
+</configuration>
diff --git a/src/arrow/java/vector/pom.xml b/src/arrow/java/vector/pom.xml
new file mode 100644
index 000000000..e37e931ef
--- /dev/null
+++ b/src/arrow/java/vector/pom.xml
@@ -0,0 +1,274 @@
+<?xml version="1.0"?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+ license agreements. See the NOTICE file distributed with this work for additional
+ information regarding copyright ownership. The ASF licenses this file to
+ You under the Apache License, Version 2.0 (the "License"); you may not use
+ this file except in compliance with the License. You may obtain a copy of
+ the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+ by applicable law or agreed to in writing, software distributed under the
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ OF ANY KIND, either express or implied. See the License for the specific
+ language governing permissions and limitations under the License. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-java-root</artifactId>
+ <version>6.0.1</version>
+ </parent>
+ <artifactId>arrow-vector</artifactId>
+ <name>Arrow Vectors</name>
+ <description>An off-heap reference implementation for Arrow columnar data format.</description>
+
+ <dependencies>
+
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-format</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-memory-core</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-annotations</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-databind</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>commons-codec</groupId>
+ <artifactId>commons-codec</artifactId>
+ <version>1.10</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-memory-netty</artifactId>
+ <version>${project.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-memory-unsafe</artifactId>
+ <version>${project.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>io.netty</groupId>
+ <artifactId>netty-common</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.google.flatbuffers</groupId>
+ <artifactId>flatbuffers-java</artifactId>
+ <version>${dep.fbs.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ </dependency>
+ </dependencies>
+
+ <pluginRepositories>
+ <pluginRepository>
+ <id>apache</id>
+ <name>apache</name>
+ <url>https://repo.maven.apache.org/maven2/</url>
+ <releases>
+ <enabled>true</enabled>
+ </releases>
+ <snapshots>
+ <enabled>false</enabled>
+ </snapshots>
+ </pluginRepository>
+ </pluginRepositories>
+
+ <build>
+
+ <resources>
+ <resource>
+ <!-- Copy freemarker template and fmpp configuration files of Vector's
+ to allow clients to leverage definitions. -->
+ <directory>${basedir}/src/main/codegen</directory>
+ <targetPath>codegen</targetPath>
+ </resource>
+ </resources>
+
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>default-test</id>
+ <phase>test</phase>
+ <configuration>
+ <classpathDependencyExcludes>
+ <classpathDependencyExclude>org.apache.arrow:arrow-memory-unsafe</classpathDependencyExclude>
+ </classpathDependencyExcludes>
+ </configuration>
+ </execution>
+ <execution>
+ <id>run-unsafe</id>
+ <phase>test</phase>
+ <goals>
+ <goal>test</goal>
+ </goals>
+ <configuration>
+ <classpathDependencyExcludes>
+ <classpathDependencyExclude>org.apache.arrow:arrow-memory-netty</classpathDependencyExclude>
+ </classpathDependencyExcludes>
+ <reportNameSuffix>netty</reportNameSuffix>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <artifactId>maven-resources-plugin</artifactId>
+ <executions>
+ <execution> <!-- copy all templates in the same location to compile them at once -->
+ <id>copy-fmpp-resources</id>
+ <phase>initialize</phase>
+ <goals>
+ <goal>copy-resources</goal>
+ </goals>
+ <configuration>
+ <outputDirectory>${project.build.directory}/codegen</outputDirectory>
+ <resources>
+ <resource>
+ <directory>src/main/codegen</directory>
+ <filtering>false</filtering>
+ </resource>
+ </resources>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin> <!-- generate sources from fmpp -->
+ <groupId>org.apache.drill.tools</groupId>
+ <artifactId>drill-fmpp-maven-plugin</artifactId>
+ <version>1.5.0</version>
+ <executions>
+ <execution>
+ <id>generate-fmpp</id>
+ <phase>generate-sources</phase>
+ <goals>
+ <goal>generate</goal>
+ </goals>
+ <configuration>
+ <config>src/main/codegen/config.fmpp</config>
+ <output>${project.build.directory}/generated-sources</output>
+ <templates>${project.build.directory}/codegen/templates</templates>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-shade-plugin</artifactId>
+ <version>3.1.1</version>
+ <executions>
+ <execution>
+ <phase>package</phase>
+ <goals>
+ <goal>shade</goal>
+ </goals>
+ <configuration>
+ <artifactSet>
+ <includes>
+ <include>org.apache.arrow:arrow-format</include>
+ <include>com.google.flatbuffers:*</include>
+ </includes>
+ </artifactSet>
+ <shadedArtifactAttached>true</shadedArtifactAttached>
+ <shadedClassifierName>shade-format-flatbuffers</shadedClassifierName>
+ <keepDependenciesWithProvidedScope>true</keepDependenciesWithProvidedScope>
+ <promoteTransitiveDependencies>true</promoteTransitiveDependencies>
+ <relocations>
+ <relocation>
+ <pattern>com.google.flatbuffers</pattern>
+ <shadedPattern>arrow.vector.com.google.flatbuffers</shadedPattern>
+ </relocation>
+ </relocations>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ <pluginManagement>
+ <plugins>
+ <!--This plugin's configuration is used to store Eclipse m2e settings
+ only. It has no influence on the Maven build itself. -->
+ <plugin>
+ <groupId>org.eclipse.m2e</groupId>
+ <artifactId>lifecycle-mapping</artifactId>
+ <version>1.0.0</version>
+ <configuration>
+ <lifecycleMappingMetadata>
+ <pluginExecutions>
+ <pluginExecution>
+ <pluginExecutionFilter>
+ <groupId>org.apache.drill.tools</groupId>
+ <artifactId>drill-fmpp-maven-plugin</artifactId>
+ <versionRange>[1.0,)</versionRange>
+ <goals>
+ <goal>generate</goal>
+ </goals>
+ </pluginExecutionFilter>
+ <action>
+ <execute>
+ <runOnIncremental>false</runOnIncremental>
+ <runOnConfiguration>true</runOnConfiguration>
+ </execute>
+ </action>
+ </pluginExecution>
+ </pluginExecutions>
+ </lifecycleMappingMetadata>
+ </configuration>
+ </plugin>
+ </plugins>
+ </pluginManagement>
+
+
+ </build>
+
+
+ <profiles>
+ <profile>
+ <!-- This profile turns on integration testing. It activates the failsafe plugin and will run any tests
+ with the 'IT' prefix. This should be run in a separate CI build or on developers machines as it potentially
+ uses quite a bit of memory. Activate the tests by adding -Pintegration-tests to your maven command line -->
+ <id>integration-tests</id>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-failsafe-plugin</artifactId>
+ <configuration>
+ <forkedProcessTimeoutInSeconds>3600</forkedProcessTimeoutInSeconds>
+ <systemPropertyVariables>
+ <arrow.memory.debug.allocator>false</arrow.memory.debug.allocator>
+ </systemPropertyVariables>
+ </configuration>
+ <executions>
+ <execution>
+ <goals>
+ <goal>integration-test</goal>
+ <goal>verify</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+ </profile>
+ </profiles>
+
+</project>
diff --git a/src/arrow/java/vector/src/main/codegen/config.fmpp b/src/arrow/java/vector/src/main/codegen/config.fmpp
new file mode 100644
index 000000000..ef5a5072a
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/config.fmpp
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+data: {
+ # TODO: Rename to ~valueVectorModesAndTypes for clarity.
+ vv: tdd(../data/ValueVectorTypes.tdd),
+ arrowTypes: tdd(../data/ArrowTypes.tdd)
+
+}
+freemarkerLinks: {
+ includes: includes/
+}
diff --git a/src/arrow/java/vector/src/main/codegen/data/ArrowTypes.tdd b/src/arrow/java/vector/src/main/codegen/data/ArrowTypes.tdd
new file mode 100644
index 000000000..3cf9a9687
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/data/ArrowTypes.tdd
@@ -0,0 +1,124 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+{
+ types: [
+ {
+ name: "Null",
+ fields: [],
+ complex: false
+ },
+ {
+ name: "Struct_",
+ fields: [],
+ complex: true
+ },
+ {
+ name: "List",
+ fields: [],
+ complex: true
+ },
+ {
+ name: "LargeList",
+ fields: [],
+ complex: true
+ },
+ {
+ name: "FixedSizeList",
+ fields: [{name: "listSize", type: int}],
+ complex: true
+ },
+ {
+ name: "Union",
+ fields: [{name: "mode", type: short, valueType: UnionMode}, {name: "typeIds", type: "int[]"}],
+ complex: true
+ },
+ {
+ name: "Map",
+ fields: [{name: "keysSorted", type: boolean}],
+ complex: true
+ },
+ {
+ name: "Int",
+ fields: [{name: "bitWidth", type: int}, {name: "isSigned", type: boolean}],
+ complex: false
+ },
+ {
+ name: "FloatingPoint",
+ fields: [{name: precision, type: short, valueType: FloatingPointPrecision}],
+ complex: false
+ },
+ {
+ name: "Utf8",
+ fields: [],
+ complex: false
+ },
+ {
+ name: "LargeUtf8",
+ fields: [],
+ complex: false
+ },
+ {
+ name: "Binary",
+ fields: [],
+ complex: false
+ },
+ {
+ name: "LargeBinary",
+ fields: [],
+ complex: false
+ },
+ {
+ name: "FixedSizeBinary",
+ fields: [{name: "byteWidth", type: int}],
+ complex: false
+ }
+ {
+ name: "Bool",
+ fields: [],
+ complex: false
+ },
+ {
+ name: "Decimal",
+ fields: [{name: "precision", type: int}, {name: "scale", type: int}, {name: "bitWidth", type: int}],
+ complex: false
+ },
+ {
+ name: "Date",
+ fields: [{name: "unit", type: short, valueType: DateUnit}]
+ complex: false
+ },
+ {
+ name: "Time",
+ fields: [{name: "unit", type: short, valueType: TimeUnit}, {name: "bitWidth", type: int}],
+ complex: false
+ },
+ {
+ name: "Timestamp",
+ fields: [{name: "unit", type: short, valueType: TimeUnit}, {name: "timezone", type: String}]
+ complex: false
+ },
+ {
+ name: "Interval",
+ fields: [{name: "unit", type: short, valueType: IntervalUnit}],
+ complex: false
+ },
+ {
+ name: "Duration",
+ fields: [{name: "unit", type: short, valueType: TimeUnit}],
+ complex: false
+ }
+ ]
+}
diff --git a/src/arrow/java/vector/src/main/codegen/data/ValueVectorTypes.tdd b/src/arrow/java/vector/src/main/codegen/data/ValueVectorTypes.tdd
new file mode 100644
index 000000000..2a9218042
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/data/ValueVectorTypes.tdd
@@ -0,0 +1,206 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+{
+ modes: [
+ {name: "Optional", prefix: "Nullable"},
+ {name: "Required", prefix: ""}
+ ],
+ types: [
+ {
+ major: "Fixed",
+ width: 1,
+ javaType: "byte",
+ boxedType: "Byte",
+ fields: [{name: "value", type: "byte"}],
+ minor: [
+ { class: "TinyInt", valueHolder: "IntHolder" },
+ { class: "UInt1", valueHolder: "UInt1Holder" }
+ ]
+ },
+ {
+ major: "Fixed",
+ width: 2,
+ javaType: "char",
+ boxedType: "Character",
+ fields: [{name: "value", type: "char"}],
+ minor: [
+ { class: "UInt2", valueHolder: "UInt2Holder"}
+ ]
+ }, {
+ major: "Fixed",
+ width: 2,
+ javaType: "short",
+ boxedType: "Short",
+ fields: [{name: "value", type: "short"}],
+ minor: [
+ { class: "SmallInt", valueHolder: "Int2Holder"},
+ ]
+ },
+ {
+ major: "Fixed",
+ width: 4,
+ javaType: "int",
+ boxedType: "Integer",
+ fields: [{name: "value", type: "int"}],
+ minor: [
+ { class: "Int", valueHolder: "IntHolder"},
+ { class: "UInt4", valueHolder: "UInt4Holder" },
+ { class: "Float4", javaType: "float" , boxedType: "Float", fields: [{name: "value", type: "float"}]},
+ { class: "DateDay" },
+ { class: "IntervalYear", javaType: "int", friendlyType: "Period" },
+ { class: "TimeSec" },
+ { class: "TimeMilli", javaType: "int", friendlyType: "LocalDateTime" }
+ ]
+ },
+ {
+ major: "Fixed",
+ width: 8,
+ javaType: "long",
+ boxedType: "Long",
+ fields: [{name: "value", type: "long"}],
+ minor: [
+ { class: "BigInt"},
+ { class: "UInt8" },
+ { class: "Float8", javaType: "double", boxedType: "Double", fields: [{name: "value", type: "double"}] },
+ { class: "DateMilli", javaType: "long", friendlyType: "LocalDateTime" },
+ { class: "Duration", javaType: "long", friendlyType: "Duration",
+ arrowType: "org.apache.arrow.vector.types.pojo.ArrowType.Duration",
+ typeParams: [ {name: "unit", type: "org.apache.arrow.vector.types.TimeUnit"} ],
+ arrowTypeConstructorParams: ["unit"]}
+ { class: "TimeStampSec", javaType: "long", boxedType: "Long", friendlyType: "LocalDateTime" },
+ { class: "TimeStampMilli", javaType: "long", boxedType: "Long", friendlyType: "LocalDateTime" },
+ { class: "TimeStampMicro", javaType: "long", boxedType: "Long", friendlyType: "LocalDateTime" },
+ { class: "TimeStampNano", javaType: "long", boxedType: "Long", friendlyType: "LocalDateTime" },
+ { class: "TimeStampSecTZ", javaType: "long", boxedType: "Long",
+ typeParams: [ {name: "timezone", type: "String"} ],
+ arrowType: "org.apache.arrow.vector.types.pojo.ArrowType.Timestamp",
+ arrowTypeConstructorParams: ["org.apache.arrow.vector.types.TimeUnit.SECOND", "timezone"] },
+ { class: "TimeStampMilliTZ", javaType: "long", boxedType: "Long",
+ typeParams: [ {name: "timezone", type: "String"} ],
+ arrowType: "org.apache.arrow.vector.types.pojo.ArrowType.Timestamp",
+ arrowTypeConstructorParams: ["org.apache.arrow.vector.types.TimeUnit.MILLISECOND", "timezone"] },
+ { class: "TimeStampMicroTZ", javaType: "long", boxedType: "Long",
+ typeParams: [ {name: "timezone", type: "String"} ],
+ arrowType: "org.apache.arrow.vector.types.pojo.ArrowType.Timestamp",
+ arrowTypeConstructorParams: ["org.apache.arrow.vector.types.TimeUnit.MICROSECOND", "timezone"] },
+ { class: "TimeStampNanoTZ", javaType: "long", boxedType: "Long",
+ typeParams: [ {name: "timezone", type: "String"} ],
+ arrowType: "org.apache.arrow.vector.types.pojo.ArrowType.Timestamp",
+ arrowTypeConstructorParams: ["org.apache.arrow.vector.types.TimeUnit.NANOSECOND", "timezone"] },
+ { class: "TimeMicro" },
+ { class: "TimeNano" }
+ ]
+ },
+ {
+ major: "Fixed",
+ width: 8,
+ javaType: "ArrowBuf",
+ boxedType: "ArrowBuf",
+ minor: [
+ { class: "IntervalDay", millisecondsOffset: 4, friendlyType: "Duration", fields: [ {name: "days", type:"int"}, {name: "milliseconds", type:"int"}] }
+ ]
+ },
+ {
+ major: "Fixed",
+ width: 16,
+ javaType: "ArrowBuf",
+ boxedType: "ArrowBuf",
+ minor: [
+ { class: "IntervalMonthDayNano", daysOffset: 4, nanosecondsOffset: 8, friendlyType: "PeriodDuration", fields: [ {name: "months", type:"int"}, {name: "days", type:"int"}, {name: "nanoseconds", type:"long"}] }
+ ]
+ },
+
+ {
+ major: "Fixed",
+ width: 32,
+ javaType: "ArrowBuf",
+ boxedType: "ArrowBuf",
+
+ minor: [
+ {
+ class: "Decimal256",
+ maxPrecisionDigits: 76, nDecimalDigits: 4, friendlyType: "BigDecimal",
+ typeParams: [ {name: "scale", type: "int"}, { name: "precision", type: "int"}],
+ arrowType: "org.apache.arrow.vector.types.pojo.ArrowType.Decimal",
+ fields: [{name: "start", type: "long"}, {name: "buffer", type: "ArrowBuf"}]
+ }
+ ]
+ },
+ {
+ major: "Fixed",
+ width: 16,
+ javaType: "ArrowBuf",
+ boxedType: "ArrowBuf",
+
+ minor: [
+ {
+ class: "Decimal",
+ maxPrecisionDigits: 38, nDecimalDigits: 4, friendlyType: "BigDecimal",
+ typeParams: [ {name: "scale", type: "int"}, { name: "precision", type: "int"}],
+ arrowType: "org.apache.arrow.vector.types.pojo.ArrowType.Decimal",
+ fields: [{name: "start", type: "long"}, {name: "buffer", type: "ArrowBuf"}]
+ }
+ ]
+ },
+
+ {
+ major: "Fixed",
+ width: -1,
+ javaType: "byte[]",
+ boxedType: "ArrowBuf",
+ minor: [
+ {
+ class: "FixedSizeBinary",
+ typeParams: [ {name: "byteWidth", type: "int"} ],
+ arrowType: "org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeBinary",
+ friendlyType: "byte[]",
+ fields: [{name: "buffer", type: "ArrowBuf"}],
+ }
+ ]
+ },
+ {
+ major: "VarLen",
+ width: 4,
+ javaType: "int",
+ boxedType: "ArrowBuf",
+ fields: [{name: "start", type: "int"}, {name: "end", type: "int"}, {name: "buffer", type: "ArrowBuf"}],
+ minor: [
+ { class: "VarBinary" , friendlyType: "byte[]" },
+ { class: "VarChar" , friendlyType: "Text" }
+ ]
+ },
+ {
+ major: "VarLen",
+ width: 8,
+ javaType: "long",
+ boxedType: "ArrowBuf",
+ fields: [{name: "start", type: "long"}, {name: "end", type: "long"}, {name: "buffer", type: "ArrowBuf"}],
+ minor: [
+ { class: "LargeVarChar" , friendlyType: "Text" }
+ { class: "LargeVarBinary" , friendlyType: "byte[]" }
+ ]
+ },
+ {
+ major: "Bit",
+ width: 1,
+ javaType: "int",
+ boxedType: "Integer",
+ minor: [
+ { class: "Bit" , friendlyType: "Boolean", fields: [{name: "value", type: "int"}] }
+ ]
+ }
+ ]
+}
diff --git a/src/arrow/java/vector/src/main/codegen/includes/license.ftl b/src/arrow/java/vector/src/main/codegen/includes/license.ftl
new file mode 100644
index 000000000..c6a5afeef
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/includes/license.ftl
@@ -0,0 +1,16 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */ \ No newline at end of file
diff --git a/src/arrow/java/vector/src/main/codegen/includes/vv_imports.ftl b/src/arrow/java/vector/src/main/codegen/includes/vv_imports.ftl
new file mode 100644
index 000000000..c9a8820b2
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/includes/vv_imports.ftl
@@ -0,0 +1,61 @@
+<#--
+ ~ Licensed to the Apache Software Foundation (ASF) under one or more
+ ~ contributor license agreements. See the NOTICE file distributed with
+ ~ this work for additional information regarding copyright ownership.
+ ~ The ASF licenses this file to You under the Apache License, Version 2.0
+ ~ (the "License"); you may not use this file except in compliance with
+ ~ the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+
+import static org.apache.arrow.util.Preconditions.checkArgument;
+import static org.apache.arrow.util.Preconditions.checkState;
+
+import com.google.flatbuffers.FlatBufferBuilder;
+
+import org.apache.arrow.memory.*;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.Types.*;
+import org.apache.arrow.vector.types.pojo.*;
+import org.apache.arrow.vector.types.pojo.ArrowType.*;
+import org.apache.arrow.vector.types.*;
+import org.apache.arrow.vector.*;
+import org.apache.arrow.vector.holders.*;
+import org.apache.arrow.vector.util.*;
+import org.apache.arrow.vector.complex.*;
+import org.apache.arrow.vector.complex.reader.*;
+import org.apache.arrow.vector.complex.impl.*;
+import org.apache.arrow.vector.complex.writer.*;
+import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter;
+import org.apache.arrow.vector.util.JsonStringArrayList;
+
+import java.util.Arrays;
+import java.util.Random;
+import java.util.List;
+
+import java.io.Closeable;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.nio.ByteBuffer;
+
+import java.sql.Date;
+import java.sql.Time;
+import java.sql.Timestamp;
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.time.Duration;
+import java.time.LocalDateTime;
+import java.time.Period;
+import java.time.ZonedDateTime;
+
+
diff --git a/src/arrow/java/vector/src/main/codegen/templates/AbstractFieldReader.java b/src/arrow/java/vector/src/main/codegen/templates/AbstractFieldReader.java
new file mode 100644
index 000000000..e3c872946
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/AbstractFieldReader.java
@@ -0,0 +1,132 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+<@pp.dropOutputFile />
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/AbstractFieldReader.java" />
+
+
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.complex.impl;
+
+<#include "/@includes/vv_imports.ftl" />
+
+/**
+ * Source code generated using FreeMarker template ${.template_name}
+ */
+@SuppressWarnings("unused")
+abstract class AbstractFieldReader extends AbstractBaseReader implements FieldReader{
+
+ AbstractFieldReader(){
+ super();
+ }
+
+ /**
+ * Returns true if the current value of the reader is not null
+ * @return whether the current value is set
+ */
+ public boolean isSet() {
+ return true;
+ }
+
+ @Override
+ public Field getField() {
+ fail("getField");
+ return null;
+ }
+
+ <#list ["Object", "BigDecimal", "Short", "Integer", "Long", "Boolean",
+ "LocalDateTime", "Duration", "Period", "Double", "Float",
+ "Character", "Text", "String", "Byte", "byte[]", "PeriodDuration"] as friendlyType>
+ <#assign safeType=friendlyType />
+ <#if safeType=="byte[]"><#assign safeType="ByteArray" /></#if>
+ public ${friendlyType} read${safeType}(int arrayIndex) {
+ fail("read${safeType}(int arrayIndex)");
+ return null;
+ }
+
+ public ${friendlyType} read${safeType}() {
+ fail("read${safeType}()");
+ return null;
+ }
+
+ </#list>
+ public void copyAsValue(StructWriter writer) {
+ fail("CopyAsValue StructWriter");
+ }
+
+ public void copyAsField(String name, StructWriter writer) {
+ fail("CopyAsField StructWriter");
+ }
+
+ public void copyAsField(String name, ListWriter writer) {
+ fail("CopyAsFieldList");
+ }
+
+ public void copyAsField(String name, MapWriter writer) {
+ fail("CopyAsFieldMap");
+ }
+
+ <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />
+ <#assign boxedType = (minor.boxedType!type.boxedType) />
+ public void read(${name}Holder holder) {
+ fail("${name}");
+ }
+
+ public void read(Nullable${name}Holder holder) {
+ fail("${name}");
+ }
+
+ public void read(int arrayIndex, ${name}Holder holder) {
+ fail("Repeated${name}");
+ }
+
+ public void read(int arrayIndex, Nullable${name}Holder holder) {
+ fail("Repeated${name}");
+ }
+
+ public void copyAsValue(${name}Writer writer) {
+ fail("CopyAsValue${name}");
+ }
+
+ public void copyAsField(String name, ${name}Writer writer) {
+ fail("CopyAsField${name}");
+ }
+
+ </#list></#list>
+ public FieldReader reader(String name) {
+ fail("reader(String name)");
+ return null;
+ }
+
+ public FieldReader reader() {
+ fail("reader()");
+ return null;
+ }
+
+ public int size() {
+ fail("size()");
+ return -1;
+ }
+
+ private void fail(String name) {
+ throw new IllegalArgumentException(String.format("You tried to read a [%s] type when you are using a field reader of type [%s].", name, this.getClass().getSimpleName()));
+ }
+}
+
+
+
diff --git a/src/arrow/java/vector/src/main/codegen/templates/AbstractFieldWriter.java b/src/arrow/java/vector/src/main/codegen/templates/AbstractFieldWriter.java
new file mode 100644
index 000000000..1f80f2526
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/AbstractFieldWriter.java
@@ -0,0 +1,230 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+<@pp.dropOutputFile />
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/AbstractFieldWriter.java" />
+
+
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.complex.impl;
+
+<#include "/@includes/vv_imports.ftl" />
+
+/*
+ * This class is generated using freemarker and the ${.template_name} template.
+ */
+@SuppressWarnings("unused")
+abstract class AbstractFieldWriter extends AbstractBaseWriter implements FieldWriter {
+
+ protected boolean addVectorAsNullable = true;
+
+ /**
+ * Set flag to control the FieldType.nullable property when a writer creates a new vector.
+ * If true then vectors created will be nullable, this is the default behavior. If false then
+ * vectors created will be non-nullable.
+ *
+ * @param nullable Whether or not to create nullable vectors (default behavior is true)
+ */
+ public void setAddVectorAsNullable(boolean nullable) {
+ addVectorAsNullable = nullable;
+ }
+
+ @Override
+ public void start() {
+ throw new IllegalStateException(String.format("You tried to start when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
+ }
+
+ @Override
+ public void end() {
+ throw new IllegalStateException(String.format("You tried to end when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
+ }
+
+ @Override
+ public void startList() {
+ throw new IllegalStateException(String.format("You tried to start a list when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
+ }
+
+ @Override
+ public void endList() {
+ throw new IllegalStateException(String.format("You tried to end a list when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
+ }
+
+ @Override
+ public void startMap() {
+ throw new IllegalStateException(String.format("You tried to start a map when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
+ }
+
+ @Override
+ public void endMap() {
+ throw new IllegalStateException(String.format("You tried to end a map when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
+ }
+
+ @Override
+ public void startEntry() {
+ throw new IllegalStateException(String.format("You tried to start a map entry when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
+ }
+
+ @Override
+ public MapWriter key() {
+ throw new IllegalStateException(String.format("You tried to start a map key when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
+ }
+
+ @Override
+ public MapWriter value() {
+ throw new IllegalStateException(String.format("You tried to start a map value when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
+ }
+
+ @Override
+ public void endEntry() {
+ throw new IllegalStateException(String.format("You tried to end a map entry when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
+ }
+
+ <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />
+ <#assign fields = minor.fields!type.fields />
+ <#assign friendlyType = (minor.friendlyType!minor.boxedType!type.boxedType) />
+ @Override
+ public void write(${name}Holder holder) {
+ fail("${name}");
+ }
+
+ public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list>) {
+ fail("${name}");
+ }
+
+ <#if minor.class?starts_with("Decimal")>
+ public void write${minor.class}(${friendlyType} value) {
+ fail("${name}");
+ }
+
+ public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list>, ArrowType arrowType) {
+ fail("${name}");
+ }
+
+ public void writeBigEndianBytesTo${minor.class}(byte[] value) {
+ fail("${name}");
+ }
+
+ public void writeBigEndianBytesTo${minor.class}(byte[] value, ArrowType arrowType) {
+ fail("${name}");
+ }
+ </#if>
+
+ </#list></#list>
+
+ public void writeNull() {
+ fail("${name}");
+ }
+
+ /**
+ * This implementation returns {@code false}.
+ * <p>
+ * Must be overridden by struct writers.
+ * </p>
+ */
+ @Override
+ public boolean isEmptyStruct() {
+ return false;
+ }
+
+ @Override
+ public StructWriter struct() {
+ fail("Struct");
+ return null;
+ }
+
+ @Override
+ public ListWriter list() {
+ fail("List");
+ return null;
+ }
+
+ @Override
+ public MapWriter map() {
+ fail("Map");
+ return null;
+ }
+
+ @Override
+ public StructWriter struct(String name) {
+ fail("Struct");
+ return null;
+ }
+
+ @Override
+ public ListWriter list(String name) {
+ fail("List");
+ return null;
+ }
+
+ @Override
+ public MapWriter map(String name) {
+ fail("Map");
+ return null;
+ }
+
+ @Override
+ public MapWriter map(boolean keysSorted) {
+ fail("Map");
+ return null;
+ }
+
+ @Override
+ public MapWriter map(String name, boolean keysSorted) {
+ fail("Map");
+ return null;
+ }
+ <#list vv.types as type><#list type.minor as minor>
+ <#assign lowerName = minor.class?uncap_first />
+ <#if lowerName == "int" ><#assign lowerName = "integer" /></#if>
+ <#assign upperName = minor.class?upper_case />
+ <#assign capName = minor.class?cap_first />
+ <#if minor.typeParams?? >
+
+ @Override
+ public ${capName}Writer ${lowerName}(String name<#list minor.typeParams as typeParam>, ${typeParam.type} ${typeParam.name}</#list>) {
+ fail("${capName}(" + <#list minor.typeParams as typeParam>"${typeParam.name}: " + ${typeParam.name} + ", " + </#list>")");
+ return null;
+ }
+ </#if>
+
+ @Override
+ public ${capName}Writer ${lowerName}(String name) {
+ fail("${capName}");
+ return null;
+ }
+
+ @Override
+ public ${capName}Writer ${lowerName}() {
+ fail("${capName}");
+ return null;
+ }
+
+ </#list></#list>
+
+ public void copyReader(FieldReader reader) {
+ fail("Copy FieldReader");
+ }
+
+ public void copyReaderToField(String name, FieldReader reader) {
+ fail("Copy FieldReader to STring");
+ }
+
+ private void fail(String name) {
+ throw new IllegalArgumentException(String.format("You tried to write a %s type when you are using a ValueWriter of type %s.", name, this.getClass().getSimpleName()));
+ }
+}
diff --git a/src/arrow/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java b/src/arrow/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java
new file mode 100644
index 000000000..264e85021
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java
@@ -0,0 +1,238 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+<@pp.dropOutputFile />
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/AbstractPromotableFieldWriter.java" />
+
+
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.complex.impl;
+
+<#include "/@includes/vv_imports.ftl" />
+
+/*
+ * A FieldWriter which delegates calls to another FieldWriter. The delegate FieldWriter can be promoted to a new type
+ * when necessary. Classes that extend this class are responsible for handling promotion.
+ *
+ * This class is generated using freemarker and the ${.template_name} template.
+ *
+ */
+@SuppressWarnings("unused")
+abstract class AbstractPromotableFieldWriter extends AbstractFieldWriter {
+ /**
+ * Retrieve the FieldWriter, promoting if it is not a FieldWriter of the specified type
+ * @param type the type of the values we want to write
+ * @return the corresponding field writer
+ */
+ protected FieldWriter getWriter(MinorType type) {
+ return getWriter(type, null);
+ }
+
+ abstract protected FieldWriter getWriter(MinorType type, ArrowType arrowType);
+
+ /**
+ * @return the current FieldWriter
+ */
+ abstract protected FieldWriter getWriter();
+
+ @Override
+ public void start() {
+ getWriter(MinorType.STRUCT).start();
+ }
+
+ @Override
+ public void end() {
+ getWriter(MinorType.STRUCT).end();
+ setPosition(idx() + 1);
+ }
+
+ @Override
+ public void startList() {
+ getWriter(MinorType.LIST).startList();
+ }
+
+ @Override
+ public void endList() {
+ getWriter(MinorType.LIST).endList();
+ setPosition(idx() + 1);
+ }
+
+ @Override
+ public void startMap() {
+ getWriter(MinorType.MAP).startMap();
+ }
+
+ @Override
+ public void endMap() {
+ getWriter(MinorType.MAP).endMap();
+ setPosition(idx() + 1);
+ }
+
+ @Override
+ public void startEntry() {
+ getWriter(MinorType.MAP).startEntry();
+ }
+
+ @Override
+ public MapWriter key() {
+ return getWriter(MinorType.MAP).key();
+ }
+
+ @Override
+ public MapWriter value() {
+ return getWriter(MinorType.MAP).value();
+ }
+
+ @Override
+ public void endEntry() {
+ getWriter(MinorType.MAP).endEntry();
+ }
+
+ <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />
+ <#assign fields = minor.fields!type.fields />
+ <#if minor.class != "Decimal" && minor.class != "Decimal256">
+ @Override
+ public void write(${name}Holder holder) {
+ getWriter(MinorType.${name?upper_case}).write(holder);
+ }
+
+ public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list>) {
+ getWriter(MinorType.${name?upper_case}).write${minor.class}(<#list fields as field>${field.name}<#if field_has_next>, </#if></#list>);
+ }
+
+ <#elseif minor.class == "Decimal">
+ @Override
+ public void write(DecimalHolder holder) {
+ getWriter(MinorType.DECIMAL).write(holder);
+ }
+
+ public void writeDecimal(int start, ArrowBuf buffer, ArrowType arrowType) {
+ getWriter(MinorType.DECIMAL).writeDecimal(start, buffer, arrowType);
+ }
+
+ public void writeDecimal(int start, ArrowBuf buffer) {
+ getWriter(MinorType.DECIMAL).writeDecimal(start, buffer);
+ }
+
+ public void writeBigEndianBytesToDecimal(byte[] value, ArrowType arrowType) {
+ getWriter(MinorType.DECIMAL).writeBigEndianBytesToDecimal(value, arrowType);
+ }
+
+ public void writeBigEndianBytesToDecimal(byte[] value) {
+ getWriter(MinorType.DECIMAL).writeBigEndianBytesToDecimal(value);
+ }
+ <#elseif minor.class == "Decimal256">
+ @Override
+ public void write(Decimal256Holder holder) {
+ getWriter(MinorType.DECIMAL256).write(holder);
+ }
+
+ public void writeDecimal256(long start, ArrowBuf buffer, ArrowType arrowType) {
+ getWriter(MinorType.DECIMAL256).writeDecimal256(start, buffer, arrowType);
+ }
+
+ public void writeDecimal256(long start, ArrowBuf buffer) {
+ getWriter(MinorType.DECIMAL256).writeDecimal256(start, buffer);
+ }
+ public void writeBigEndianBytesToDecimal256(byte[] value, ArrowType arrowType) {
+ getWriter(MinorType.DECIMAL256).writeBigEndianBytesToDecimal256(value, arrowType);
+ }
+
+ public void writeBigEndianBytesToDecimal256(byte[] value) {
+ getWriter(MinorType.DECIMAL256).writeBigEndianBytesToDecimal256(value);
+ }
+
+
+ </#if>
+
+ </#list></#list>
+ public void writeNull() {
+ }
+
+ @Override
+ public StructWriter struct() {
+ return getWriter(MinorType.LIST).struct();
+ }
+
+ @Override
+ public ListWriter list() {
+ return getWriter(MinorType.LIST).list();
+ }
+
+ @Override
+ public MapWriter map() {
+ return getWriter(MinorType.LIST).map();
+ }
+
+ @Override
+ public MapWriter map(boolean keysSorted) {
+ return getWriter(MinorType.MAP, new ArrowType.Map(keysSorted));
+ }
+
+ @Override
+ public StructWriter struct(String name) {
+ return getWriter(MinorType.STRUCT).struct(name);
+ }
+
+ @Override
+ public ListWriter list(String name) {
+ return getWriter(MinorType.STRUCT).list(name);
+ }
+
+ @Override
+ public MapWriter map(String name) {
+ return getWriter(MinorType.STRUCT).map(name);
+ }
+
+ @Override
+ public MapWriter map(String name, boolean keysSorted) {
+ return getWriter(MinorType.STRUCT).map(name, keysSorted);
+ }
+ <#list vv.types as type><#list type.minor as minor>
+ <#assign lowerName = minor.class?uncap_first />
+ <#if lowerName == "int" ><#assign lowerName = "integer" /></#if>
+ <#assign upperName = minor.class?upper_case />
+ <#assign capName = minor.class?cap_first />
+
+ <#if minor.typeParams?? >
+ @Override
+ public ${capName}Writer ${lowerName}(String name<#list minor.typeParams as typeParam>, ${typeParam.type} ${typeParam.name}</#list>) {
+ return getWriter(MinorType.STRUCT).${lowerName}(name<#list minor.typeParams as typeParam>, ${typeParam.name}</#list>);
+ }
+
+ </#if>
+ @Override
+ public ${capName}Writer ${lowerName}(String name) {
+ return getWriter(MinorType.STRUCT).${lowerName}(name);
+ }
+
+ @Override
+ public ${capName}Writer ${lowerName}() {
+ return getWriter(MinorType.LIST).${lowerName}();
+ }
+
+ </#list></#list>
+
+ public void copyReader(FieldReader reader) {
+ getWriter().copyReader(reader);
+ }
+
+ public void copyReaderToField(String name, FieldReader reader) {
+ getWriter().copyReaderToField(name, reader);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/codegen/templates/ArrowType.java b/src/arrow/java/vector/src/main/codegen/templates/ArrowType.java
new file mode 100644
index 000000000..b08d4ad0a
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/ArrowType.java
@@ -0,0 +1,375 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+<@pp.dropOutputFile />
+<@pp.changeOutputFile name="/org/apache/arrow/vector/types/pojo/ArrowType.java" />
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.types.pojo;
+
+import com.google.flatbuffers.FlatBufferBuilder;
+
+import java.util.Objects;
+
+import org.apache.arrow.flatbuf.Type;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.types.*;
+import org.apache.arrow.vector.FieldVector;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonSubTypes;
+import com.fasterxml.jackson.annotation.JsonTypeInfo;
+
+/**
+ * Arrow types
+ * Source code generated using FreeMarker template ${.template_name}
+ **/
+@JsonTypeInfo(
+ use = JsonTypeInfo.Id.NAME,
+ include = JsonTypeInfo.As.PROPERTY,
+ property = "name")
+@JsonSubTypes({
+<#list arrowTypes.types as type>
+ @JsonSubTypes.Type(value = ArrowType.${type.name?remove_ending("_")}.class, name = "${type.name?remove_ending("_")?lower_case}"),
+</#list>
+})
+public abstract class ArrowType {
+
+ public static abstract class PrimitiveType extends ArrowType {
+
+ private PrimitiveType() {
+ }
+
+ @Override
+ public boolean isComplex() {
+ return false;
+ }
+ }
+
+ public static abstract class ComplexType extends ArrowType {
+
+ private ComplexType() {
+ }
+
+ @Override
+ public boolean isComplex() {
+ return true;
+ }
+ }
+
+ public static enum ArrowTypeID {
+ <#list arrowTypes.types as type>
+ <#assign name = type.name>
+ ${name?remove_ending("_")}(Type.${name}),
+ </#list>
+ NONE(Type.NONE);
+
+ private final byte flatbufType;
+
+ public byte getFlatbufID() {
+ return this.flatbufType;
+ }
+
+ private ArrowTypeID(byte flatbufType) {
+ this.flatbufType = flatbufType;
+ }
+ }
+
+ @JsonIgnore
+ public abstract ArrowTypeID getTypeID();
+ @JsonIgnore
+ public abstract boolean isComplex();
+ public abstract int getType(FlatBufferBuilder builder);
+ public abstract <T> T accept(ArrowTypeVisitor<T> visitor);
+
+ /**
+ * to visit the ArrowTypes
+ * <code>
+ * type.accept(new ArrowTypeVisitor&lt;Type&gt;() {
+ * ...
+ * });
+ * </code>
+ */
+ public static interface ArrowTypeVisitor<T> {
+ <#list arrowTypes.types as type>
+ T visit(${type.name?remove_ending("_")} type);
+ </#list>
+ default T visit(ExtensionType type) {
+ return type.storageType().accept(this);
+ }
+ }
+
+ /**
+ * to visit the Complex ArrowTypes and bundle Primitive ones in one case
+ */
+ public static abstract class ComplexTypeVisitor<T> implements ArrowTypeVisitor<T> {
+
+ public T visit(PrimitiveType type) {
+ throw new UnsupportedOperationException("Unexpected Primitive type: " + type);
+ }
+
+ <#list arrowTypes.types as type>
+ <#if !type.complex>
+ public final T visit(${type.name?remove_ending("_")} type) {
+ return visit((PrimitiveType) type);
+ }
+ </#if>
+ </#list>
+ }
+
+ /**
+ * to visit the Primitive ArrowTypes and bundle Complex ones under one case
+ */
+ public static abstract class PrimitiveTypeVisitor<T> implements ArrowTypeVisitor<T> {
+
+ public T visit(ComplexType type) {
+ throw new UnsupportedOperationException("Unexpected Complex type: " + type);
+ }
+
+ <#list arrowTypes.types as type>
+ <#if type.complex>
+ public final T visit(${type.name?remove_ending("_")} type) {
+ return visit((ComplexType) type);
+ }
+ </#if>
+ </#list>
+ }
+
+ <#list arrowTypes.types as type>
+ <#assign name = type.name?remove_ending("_")>
+ <#assign fields = type.fields>
+ public static class ${name} extends <#if type.complex>ComplexType<#else>PrimitiveType</#if> {
+ public static final ArrowTypeID TYPE_TYPE = ArrowTypeID.${name};
+ <#if type.fields?size == 0>
+ public static final ${name} INSTANCE = new ${name}();
+ <#else>
+
+ <#list fields as field>
+ <#assign fieldType = field.valueType!field.type>
+ ${fieldType} ${field.name};
+ </#list>
+
+
+ <#if type.name == "Decimal">
+ // Needed to support golden file integration tests.
+ @JsonCreator
+ public static Decimal createDecimal(
+ @JsonProperty("precision") int precision,
+ @JsonProperty("scale") int scale,
+ @JsonProperty("bitWidth") Integer bitWidth) {
+
+ return new Decimal(precision, scale, bitWidth == null ? 128 : bitWidth);
+ }
+
+ /**
+ * Construct Decimal with 128 bits.
+ *
+ * This is kept mainly for the sake of backward compatibility.
+ * Please use {@link org.apache.arrow.vector.types.pojo.ArrowType.Decimal#Decimal(int, int, int)} instead.
+ *
+ * @deprecated This API will be removed in a future release.
+ */
+ @Deprecated
+ public Decimal(int precision, int scale) {
+ this(precision, scale, 128);
+ }
+
+ <#else>
+ @JsonCreator
+ </#if>
+ public ${type.name}(
+ <#list type.fields as field>
+ <#assign fieldType = field.valueType!field.type>
+ @JsonProperty("${field.name}") ${fieldType} ${field.name}<#if field_has_next>, </#if>
+ </#list>
+ ) {
+ <#list type.fields as field>
+ this.${field.name} = ${field.name};
+ </#list>
+ }
+
+ <#list fields as field>
+ <#assign fieldType = field.valueType!field.type>
+ public ${fieldType} get${field.name?cap_first}() {
+ return ${field.name};
+ }
+ </#list>
+ </#if>
+
+ @Override
+ public ArrowTypeID getTypeID() {
+ return TYPE_TYPE;
+ }
+
+ @Override
+ public int getType(FlatBufferBuilder builder) {
+ <#list type.fields as field>
+ <#if field.type == "String">
+ int ${field.name} = this.${field.name} == null ? -1 : builder.createString(this.${field.name});
+ </#if>
+ <#if field.type == "int[]">
+ int ${field.name} = this.${field.name} == null ? -1 : org.apache.arrow.flatbuf.${type.name}.create${field.name?cap_first}Vector(builder, this.${field.name});
+ </#if>
+ </#list>
+ org.apache.arrow.flatbuf.${type.name}.start${type.name}(builder);
+ <#list type.fields as field>
+ <#if field.type == "String" || field.type == "int[]">
+ if (this.${field.name} != null) {
+ org.apache.arrow.flatbuf.${type.name}.add${field.name?cap_first}(builder, ${field.name});
+ }
+ <#else>
+ org.apache.arrow.flatbuf.${type.name}.add${field.name?cap_first}(builder, this.${field.name}<#if field.valueType??>.getFlatbufID()</#if>);
+ </#if>
+ </#list>
+ return org.apache.arrow.flatbuf.${type.name}.end${type.name}(builder);
+ }
+
+ public String toString() {
+ return "${name}"
+ <#if fields?size != 0>
+ + "("
+ <#list fields as field>
+ + <#if field.type == "int[]">java.util.Arrays.toString(${field.name})<#else>${field.name}</#if><#if field_has_next> + ", " </#if>
+ </#list>
+ + ")"
+ </#if>
+ ;
+ }
+
+ @Override
+ public int hashCode() {
+ return java.util.Arrays.deepHashCode(new Object[] {<#list type.fields as field>${field.name}<#if field_has_next>, </#if></#list>});
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (!(obj instanceof ${name})) {
+ return false;
+ }
+ <#if type.fields?size == 0>
+ return true;
+ <#else>
+ ${type.name} that = (${type.name}) obj;
+ return <#list type.fields as field>Objects.deepEquals(this.${field.name}, that.${field.name}) <#if field_has_next>&&<#else>;</#if>
+ </#list>
+ </#if>
+ }
+
+ @Override
+ public <T> T accept(ArrowTypeVisitor<T> visitor) {
+ return visitor.visit(this);
+ }
+ }
+ </#list>
+
+ /**
+ * A user-defined data type that wraps an underlying storage type.
+ */
+ public abstract static class ExtensionType extends ComplexType {
+ /** The on-wire type for this user-defined type. */
+ public abstract ArrowType storageType();
+ /** The name of this user-defined type. Used to identify the type during serialization. */
+ public abstract String extensionName();
+ /** Check equality of this type to another user-defined type. */
+ public abstract boolean extensionEquals(ExtensionType other);
+ /** Save any metadata for this type. */
+ public abstract String serialize();
+ /** Given saved metadata and the underlying storage type, construct a new instance of the user type. */
+ public abstract ArrowType deserialize(ArrowType storageType, String serializedData);
+ /** Construct a vector for the user type. */
+ public abstract FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator);
+
+ /** The field metadata key storing the name of the extension type. */
+ public static final String EXTENSION_METADATA_KEY_NAME = "ARROW:extension:name";
+ /** The field metadata key storing metadata for the extension type. */
+ public static final String EXTENSION_METADATA_KEY_METADATA = "ARROW:extension:metadata";
+
+ @Override
+ public ArrowTypeID getTypeID() {
+ return storageType().getTypeID();
+ }
+
+ @Override
+ public int getType(FlatBufferBuilder builder) {
+ return storageType().getType(builder);
+ }
+
+ public String toString() {
+ return "ExtensionType(" + extensionName() + ", " + storageType().toString() + ")";
+ }
+
+ @Override
+ public int hashCode() {
+ return java.util.Arrays.deepHashCode(new Object[] {storageType(), extensionName()});
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (!(obj instanceof ExtensionType)) {
+ return false;
+ }
+ return this.extensionEquals((ExtensionType) obj);
+ }
+
+ @Override
+ public <T> T accept(ArrowTypeVisitor<T> visitor) {
+ return visitor.visit(this);
+ }
+ }
+
+ private static final int defaultDecimalBitWidth = 128;
+
+ public static org.apache.arrow.vector.types.pojo.ArrowType getTypeForField(org.apache.arrow.flatbuf.Field field) {
+ switch(field.typeType()) {
+ <#list arrowTypes.types as type>
+ <#assign name = type.name?remove_ending("_")>
+ <#assign nameLower = type.name?lower_case>
+ <#assign fields = type.fields>
+ case Type.${type.name}: {
+ org.apache.arrow.flatbuf.${type.name} ${nameLower}Type = (org.apache.arrow.flatbuf.${type.name}) field.type(new org.apache.arrow.flatbuf.${type.name}());
+ <#list type.fields as field>
+ <#if field.type == "int[]">
+ ${field.type} ${field.name} = new int[${nameLower}Type.${field.name}Length()];
+ for (int i = 0; i< ${field.name}.length; ++i) {
+ ${field.name}[i] = ${nameLower}Type.${field.name}(i);
+ }
+ <#else>
+ ${field.type} ${field.name} = ${nameLower}Type.${field.name}();
+ </#if>
+ </#list>
+ <#if type.name == "Decimal">
+ if (bitWidth != defaultDecimalBitWidth && bitWidth != 256) {
+ throw new IllegalArgumentException("Library only supports 128-bit and 256-bit decimal values");
+ }
+ </#if>
+ return new ${name}(<#list type.fields as field><#if field.valueType??>${field.valueType}.fromFlatbufID(${field.name})<#else>${field.name}</#if><#if field_has_next>, </#if></#list>);
+ }
+ </#list>
+ default:
+ throw new UnsupportedOperationException("Unsupported type: " + field.typeType());
+ }
+ }
+
+ public static Int getInt(org.apache.arrow.flatbuf.Field field) {
+ org.apache.arrow.flatbuf.Int intType = (org.apache.arrow.flatbuf.Int) field.type(new org.apache.arrow.flatbuf.Int());
+ return new Int(intType.bitWidth(), intType.isSigned());
+ }
+}
+
+
diff --git a/src/arrow/java/vector/src/main/codegen/templates/BaseReader.java b/src/arrow/java/vector/src/main/codegen/templates/BaseReader.java
new file mode 100644
index 000000000..85d582a53
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/BaseReader.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+<@pp.dropOutputFile />
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/reader/BaseReader.java" />
+
+
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.complex.reader;
+
+<#include "/@includes/vv_imports.ftl" />
+
+/**
+ * Source code generated using FreeMarker template ${.template_name}
+ */
+@SuppressWarnings("unused")
+public interface BaseReader extends Positionable{
+ Field getField();
+ MinorType getMinorType();
+ void reset();
+ void read(UnionHolder holder);
+ void read(int index, UnionHolder holder);
+ void copyAsValue(UnionWriter writer);
+ void read(DenseUnionHolder holder);
+ void read(int index, DenseUnionHolder holder);
+ void copyAsValue(DenseUnionWriter writer);
+ boolean isSet();
+
+ public interface StructReader extends BaseReader, Iterable<String>{
+ FieldReader reader(String name);
+ }
+
+ public interface RepeatedStructReader extends StructReader{
+ boolean next();
+ int size();
+ void copyAsValue(StructWriter writer);
+ }
+
+ public interface ListReader extends BaseReader{
+ FieldReader reader();
+ }
+
+ public interface RepeatedListReader extends ListReader{
+ boolean next();
+ int size();
+ void copyAsValue(ListWriter writer);
+ }
+
+ public interface MapReader extends BaseReader{
+ FieldReader reader();
+ }
+
+ public interface RepeatedMapReader extends MapReader{
+ boolean next();
+ int size();
+ void copyAsValue(MapWriter writer);
+ }
+
+ public interface ScalarReader extends
+ <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> ${name}Reader, </#list></#list>
+ BaseReader {}
+
+ interface ComplexReader{
+ StructReader rootAsStruct();
+ ListReader rootAsList();
+ boolean rootIsStruct();
+ boolean ok();
+ }
+}
+
diff --git a/src/arrow/java/vector/src/main/codegen/templates/BaseWriter.java b/src/arrow/java/vector/src/main/codegen/templates/BaseWriter.java
new file mode 100644
index 000000000..4d63fb73e
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/BaseWriter.java
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+<@pp.dropOutputFile />
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/writer/BaseWriter.java" />
+
+
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.complex.writer;
+
+<#include "/@includes/vv_imports.ftl" />
+
+/*
+ * File generated from ${.template_name} using FreeMarker.
+ */
+@SuppressWarnings("unused")
+public interface BaseWriter extends AutoCloseable, Positionable {
+ int getValueCapacity();
+ void writeNull();
+
+ public interface StructWriter extends BaseWriter {
+
+ Field getField();
+
+ /**
+ * Whether this writer is a struct writer and is empty (has no children).
+ *
+ * <p>
+ * Intended only for use in determining whether to add dummy vector to
+ * avoid empty (zero-column) schema, as in JsonReader.
+ * </p>
+ * @return whether the struct is empty
+ */
+ boolean isEmptyStruct();
+
+ <#list vv.types as type><#list type.minor as minor>
+ <#assign lowerName = minor.class?uncap_first />
+ <#if lowerName == "int" ><#assign lowerName = "integer" /></#if>
+ <#assign upperName = minor.class?upper_case />
+ <#assign capName = minor.class?cap_first />
+ <#if minor.typeParams?? >
+ ${capName}Writer ${lowerName}(String name<#list minor.typeParams as typeParam>, ${typeParam.type} ${typeParam.name}</#list>);
+ </#if>
+ ${capName}Writer ${lowerName}(String name);
+ </#list></#list>
+
+ void copyReaderToField(String name, FieldReader reader);
+ StructWriter struct(String name);
+ ListWriter list(String name);
+ MapWriter map(String name);
+ MapWriter map(String name, boolean keysSorted);
+ void start();
+ void end();
+ }
+
+ public interface ListWriter extends BaseWriter {
+ void startList();
+ void endList();
+ StructWriter struct();
+ ListWriter list();
+ MapWriter map();
+ MapWriter map(boolean keysSorted);
+ void copyReader(FieldReader reader);
+
+ <#list vv.types as type><#list type.minor as minor>
+ <#assign lowerName = minor.class?uncap_first />
+ <#if lowerName == "int" ><#assign lowerName = "integer" /></#if>
+ <#assign upperName = minor.class?upper_case />
+ <#assign capName = minor.class?cap_first />
+ ${capName}Writer ${lowerName}();
+ </#list></#list>
+ }
+
+ public interface MapWriter extends ListWriter {
+ void startMap();
+ void endMap();
+
+ void startEntry();
+ void endEntry();
+
+ MapWriter key();
+ MapWriter value();
+ }
+
+ public interface ScalarWriter extends
+ <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> ${name}Writer, </#list></#list> BaseWriter {}
+
+ public interface ComplexWriter {
+ void allocate();
+ void clear();
+ void copyReader(FieldReader reader);
+ StructWriter rootAsStruct();
+ ListWriter rootAsList();
+
+ void setPosition(int index);
+ void setValueCount(int count);
+ void reset();
+ }
+
+ public interface StructOrListWriter {
+ void start();
+ void end();
+ StructOrListWriter struct(String name);
+ StructOrListWriter listoftstruct(String name);
+ StructOrListWriter list(String name);
+ boolean isStructWriter();
+ boolean isListWriter();
+ VarCharWriter varChar(String name);
+ IntWriter integer(String name);
+ BigIntWriter bigInt(String name);
+ Float4Writer float4(String name);
+ Float8Writer float8(String name);
+ BitWriter bit(String name);
+ VarBinaryWriter binary(String name);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/codegen/templates/CaseSensitiveStructWriters.java b/src/arrow/java/vector/src/main/codegen/templates/CaseSensitiveStructWriters.java
new file mode 100644
index 000000000..cc0dd7b33
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/CaseSensitiveStructWriters.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+<@pp.dropOutputFile />
+<#list ["Nullable", "Single"] as mode>
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/${mode}CaseSensitiveStructWriter.java" />
+<#assign index = "idx()">
+<#if mode == "Single">
+<#assign containerClass = "NonNullableStructVector" />
+<#else>
+<#assign containerClass = "StructVector" />
+</#if>
+
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.complex.impl;
+
+<#include "/@includes/vv_imports.ftl" />
+/*
+ * This class is generated using FreeMarker and the ${.template_name} template.
+ */
+@SuppressWarnings("unused")
+public class ${mode}CaseSensitiveStructWriter extends ${mode}StructWriter {
+ public ${mode}CaseSensitiveStructWriter(${containerClass} container) {
+ super(container);
+ }
+
+ @Override
+ protected String handleCase(final String input){
+ return input;
+ }
+
+ @Override
+ protected NullableStructWriterFactory getNullableStructWriterFactory() {
+ return NullableStructWriterFactory.getNullableCaseSensitiveStructWriterFactoryInstance();
+ }
+
+}
+</#list>
diff --git a/src/arrow/java/vector/src/main/codegen/templates/ComplexCopier.java b/src/arrow/java/vector/src/main/codegen/templates/ComplexCopier.java
new file mode 100644
index 000000000..39a84041e
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/ComplexCopier.java
@@ -0,0 +1,191 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.arrow.vector.complex.MapVector;
+import org.apache.arrow.vector.complex.impl.UnionMapReader;
+import org.apache.arrow.vector.complex.impl.UnionMapWriter;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.complex.writer.FieldWriter;
+import org.apache.arrow.vector.types.Types;
+
+<@pp.dropOutputFile />
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/ComplexCopier.java" />
+
+
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.complex.impl;
+
+<#include "/@includes/vv_imports.ftl" />
+
+/*
+ * This class is generated using freemarker and the ${.template_name} template.
+ */
+@SuppressWarnings("unused")
+public class ComplexCopier {
+
+ /**
+ * Do a deep copy of the value in input into output
+ * @param input field to read from
+ * @param output field to write to
+ */
+ public static void copy(FieldReader input, FieldWriter output) {
+ writeValue(input, output);
+ }
+
+ private static void writeValue(FieldReader reader, FieldWriter writer) {
+ final MinorType mt = reader.getMinorType();
+
+ switch (mt) {
+
+ case LIST:
+ case LARGELIST:
+ case FIXED_SIZE_LIST:
+ if (reader.isSet()) {
+ writer.startList();
+ while (reader.next()) {
+ FieldReader childReader = reader.reader();
+ FieldWriter childWriter = getListWriterForReader(childReader, writer);
+ if (childReader.isSet()) {
+ writeValue(childReader, childWriter);
+ } else {
+ childWriter.writeNull();
+ }
+ }
+ writer.endList();
+ } else {
+ writer.writeNull();
+ }
+ break;
+ case MAP:
+ if (reader.isSet()) {
+ UnionMapWriter mapWriter = (UnionMapWriter) writer;
+ UnionMapReader mapReader = (UnionMapReader) reader;
+
+ mapWriter.startMap();
+ while (mapReader.next()) {
+ FieldReader structReader = reader.reader();
+ UnionMapWriter structWriter = (UnionMapWriter) writer.struct();
+ if (structReader.isSet()) {
+ mapWriter.startEntry();
+ writeValue(mapReader.key(), getStructWriterForReader(mapReader.key(), structWriter.key(), MapVector.KEY_NAME));
+ writeValue(mapReader.value(), getStructWriterForReader(mapReader.value(), structWriter.value(), MapVector.VALUE_NAME));
+ mapWriter.endEntry();
+ } else {
+ structWriter.writeNull();
+ }
+ }
+ mapWriter.endMap();
+ } else {
+ writer.writeNull();
+ }
+ break;
+ case STRUCT:
+ if (reader.isSet()) {
+ writer.start();
+ for(String name : reader){
+ FieldReader childReader = reader.reader(name);
+ if (childReader.getMinorType() != Types.MinorType.NULL) {
+ FieldWriter childWriter = getStructWriterForReader(childReader, writer, name);
+ if (childReader.isSet()) {
+ writeValue(childReader, childWriter);
+ } else {
+ childWriter.writeNull();
+ }
+ }
+ }
+ writer.end();
+ } else {
+ writer.writeNull();
+ }
+ break;
+ <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />
+ <#assign fields = minor.fields!type.fields />
+ <#assign uncappedName = name?uncap_first/>
+
+ <#if !minor.typeParams?? || minor.class?starts_with("Decimal") >
+
+ case ${name?upper_case}:
+ if (reader.isSet()) {
+ Nullable${name}Holder ${uncappedName}Holder = new Nullable${name}Holder();
+ reader.read(${uncappedName}Holder);
+ if (${uncappedName}Holder.isSet == 1) {
+ writer.write${name}(<#list fields as field>${uncappedName}Holder.${field.name}<#if field_has_next>, </#if></#list><#if minor.class?starts_with("Decimal")>, new ArrowType.Decimal(${uncappedName}Holder.precision, ${uncappedName}Holder.scale, ${name}Holder.WIDTH * 8)</#if>);
+ }
+ } else {
+ writer.writeNull();
+ }
+ break;
+
+ </#if>
+ </#list></#list>
+ }
+ }
+
+ private static FieldWriter getStructWriterForReader(FieldReader reader, StructWriter writer, String name) {
+ switch (reader.getMinorType()) {
+ <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />
+ <#assign fields = minor.fields!type.fields />
+ <#assign uncappedName = name?uncap_first/>
+ <#if !minor.typeParams??>
+ case ${name?upper_case}:
+ return (FieldWriter) writer.<#if name == "Int">integer<#else>${uncappedName}</#if>(name);
+ </#if>
+ <#if minor.class?starts_with("Decimal")>
+ case ${name?upper_case}:
+ if (reader.getField().getType() instanceof ArrowType.Decimal) {
+ ArrowType.Decimal type = (ArrowType.Decimal) reader.getField().getType();
+ return (FieldWriter) writer.${uncappedName}(name, type.getScale(), type.getPrecision());
+ } else {
+ return (FieldWriter) writer.${uncappedName}(name);
+ }
+ </#if>
+
+ </#list></#list>
+ case STRUCT:
+ return (FieldWriter) writer.struct(name);
+ case FIXED_SIZE_LIST:
+ case LIST:
+ case MAP:
+ return (FieldWriter) writer.list(name);
+ default:
+ throw new UnsupportedOperationException(reader.getMinorType().toString());
+ }
+ }
+
+ private static FieldWriter getListWriterForReader(FieldReader reader, ListWriter writer) {
+ switch (reader.getMinorType()) {
+ <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />
+ <#assign fields = minor.fields!type.fields />
+ <#assign uncappedName = name?uncap_first/>
+ <#if !minor.typeParams?? || minor.class?starts_with("Decimal") >
+ case ${name?upper_case}:
+ return (FieldWriter) writer.<#if name == "Int">integer<#else>${uncappedName}</#if>();
+ </#if>
+ </#list></#list>
+ case STRUCT:
+ return (FieldWriter) writer.struct();
+ case FIXED_SIZE_LIST:
+ case LIST:
+ case MAP:
+ case NULL:
+ return (FieldWriter) writer.list();
+ default:
+ throw new UnsupportedOperationException(reader.getMinorType().toString());
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/codegen/templates/ComplexReaders.java b/src/arrow/java/vector/src/main/codegen/templates/ComplexReaders.java
new file mode 100644
index 000000000..48fb6603a
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/ComplexReaders.java
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.Override;
+import java.util.List;
+
+import org.apache.arrow.record.TransferPair;
+import org.apache.arrow.vector.complex.IndexHolder;
+import org.apache.arrow.vector.complex.writer.IntervalWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter;
+
+<@pp.dropOutputFile />
+<#list vv.types as type>
+<#list type.minor as minor>
+<#list [""] as mode>
+<#assign lowerName = minor.class?uncap_first />
+<#if lowerName == "int" ><#assign lowerName = "integer" /></#if>
+<#assign name = minor.class?cap_first />
+<#assign javaType = (minor.javaType!type.javaType) />
+<#assign friendlyType = (minor.friendlyType!minor.boxedType!type.boxedType) />
+<#assign safeType=friendlyType />
+<#if safeType=="byte[]"><#assign safeType="ByteArray" /></#if>
+
+<#assign hasFriendly = minor.friendlyType!"no" == "no" />
+
+<#list ["Nullable"] as nullMode>
+<#if mode == "" >
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/${name}ReaderImpl.java" />
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.complex.impl;
+
+<#include "/@includes/vv_imports.ftl" />
+
+/**
+ * Source code generated using FreeMarker template ${.template_name}
+ */
+@SuppressWarnings("unused")
+public class ${name}ReaderImpl extends AbstractFieldReader {
+
+ private final ${name}Vector vector;
+
+ public ${name}ReaderImpl(${name}Vector vector){
+ super();
+ this.vector = vector;
+ }
+
+ public MinorType getMinorType(){
+ return vector.getMinorType();
+ }
+
+ public Field getField(){
+ return vector.getField();
+ }
+
+ public boolean isSet(){
+ return !vector.isNull(idx());
+ }
+
+ public void copyAsValue(${minor.class?cap_first}Writer writer){
+ ${minor.class?cap_first}WriterImpl impl = (${minor.class?cap_first}WriterImpl) writer;
+ impl.vector.copyFromSafe(idx(), impl.idx(), vector);
+ }
+
+ public void copyAsField(String name, StructWriter writer){
+ ${minor.class?cap_first}WriterImpl impl = (${minor.class?cap_first}WriterImpl) writer.${lowerName}(name);
+ impl.vector.copyFromSafe(idx(), impl.idx(), vector);
+ }
+
+ <#if nullMode != "Nullable">
+ public void read(${minor.class?cap_first}Holder h){
+ vector.get(idx(), h);
+ }
+ </#if>
+
+ public void read(Nullable${minor.class?cap_first}Holder h){
+ vector.get(idx(), h);
+ }
+
+ public ${friendlyType} read${safeType}(){
+ return vector.getObject(idx());
+ }
+
+ <#if minor.class == "TimeStampSec" ||
+ minor.class == "TimeStampMilli" ||
+ minor.class == "TimeStampMicro" ||
+ minor.class == "TimeStampNano">
+ @Override
+ public ${minor.boxedType} read${minor.boxedType}(){
+ return vector.get(idx());
+ }
+ </#if>
+
+ public void copyValue(FieldWriter w){
+
+ }
+
+ public Object readObject(){
+ return (Object)vector.getObject(idx());
+ }
+}
+</#if>
+</#list>
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/reader/${name}Reader.java" />
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.complex.reader;
+
+<#include "/@includes/vv_imports.ftl" />
+/**
+ * Source code generated using FreeMarker template ${.template_name}
+ */
+@SuppressWarnings("unused")
+public interface ${name}Reader extends BaseReader{
+
+ public void read(${minor.class?cap_first}Holder h);
+ public void read(Nullable${minor.class?cap_first}Holder h);
+ public Object readObject();
+ // read friendly type
+ public ${friendlyType} read${safeType}();
+ public boolean isSet();
+ public void copyAsValue(${minor.class}Writer writer);
+ public void copyAsField(String name, ${minor.class}Writer writer);
+
+}
+
+
+
+</#list>
+</#list>
+</#list>
+
+
diff --git a/src/arrow/java/vector/src/main/codegen/templates/ComplexWriters.java b/src/arrow/java/vector/src/main/codegen/templates/ComplexWriters.java
new file mode 100644
index 000000000..0381e5559
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/ComplexWriters.java
@@ -0,0 +1,211 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+<@pp.dropOutputFile />
+<#list vv.types as type>
+<#list type.minor as minor>
+<#list ["Nullable"] as mode>
+<#assign name = minor.class?cap_first />
+<#assign eName = name />
+<#assign javaType = (minor.javaType!type.javaType) />
+<#assign fields = minor.fields!type.fields />
+<#assign friendlyType = (minor.friendlyType!minor.boxedType!type.boxedType) />
+
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/${eName}WriterImpl.java" />
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.complex.impl;
+
+<#include "/@includes/vv_imports.ftl" />
+
+/*
+ * This class is generated using FreeMarker on the ${.template_name} template.
+ */
+@SuppressWarnings("unused")
+public class ${eName}WriterImpl extends AbstractFieldWriter {
+
+ final ${name}Vector vector;
+
+ public ${eName}WriterImpl(${name}Vector vector) {
+ this.vector = vector;
+ }
+
+ @Override
+ public Field getField() {
+ return vector.getField();
+ }
+
+ @Override
+ public int getValueCapacity() {
+ return vector.getValueCapacity();
+ }
+
+ @Override
+ public void allocate() {
+ vector.allocateNew();
+ }
+
+ @Override
+ public void close() {
+ vector.close();
+ }
+
+ @Override
+ public void clear() {
+ vector.clear();
+ }
+
+ @Override
+ protected int idx() {
+ return super.idx();
+ }
+
+ <#if mode == "Repeated">
+
+ public void write(${minor.class?cap_first}Holder h) {
+ mutator.addSafe(idx(), h);
+ vector.setValueCount(idx()+1);
+ }
+
+ public void write(${minor.class?cap_first}Holder h) {
+ mutator.addSafe(idx(), h);
+ vector.setValueCount(idx()+1);
+ }
+
+ public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list>) {
+ mutator.addSafe(idx(), <#list fields as field>${field.name}<#if field_has_next>, </#if></#list>);
+ vector.setValueCount(idx()+1);
+ }
+
+ public void setPosition(int idx) {
+ super.setPosition(idx);
+ mutator.startNewValue(idx);
+ }
+
+
+ <#else>
+
+ <#if !minor.class?starts_with("Decimal")>
+ public void write(${minor.class}Holder h) {
+ vector.setSafe(idx(), h);
+ vector.setValueCount(idx()+1);
+ }
+
+ public void write(Nullable${minor.class}Holder h) {
+ vector.setSafe(idx(), h);
+ vector.setValueCount(idx()+1);
+ }
+
+ public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list>) {
+ vector.setSafe(idx(), 1<#list fields as field><#if field.include!true >, ${field.name}</#if></#list>);
+ vector.setValueCount(idx()+1);
+ }
+ </#if>
+
+ <#if minor.class == "VarChar">
+ public void write${minor.class}(${friendlyType} value) {
+ vector.setSafe(idx(), value);
+ vector.setValueCount(idx()+1);
+ }
+ </#if>
+
+ <#if minor.class?starts_with("Decimal")>
+
+ public void write(${minor.class}Holder h){
+ DecimalUtility.checkPrecisionAndScale(h.precision, h.scale, vector.getPrecision(), vector.getScale());
+ vector.setSafe(idx(), h);
+ vector.setValueCount(idx() + 1);
+ }
+
+ public void write(Nullable${minor.class}Holder h){
+ if (h.isSet == 1) {
+ DecimalUtility.checkPrecisionAndScale(h.precision, h.scale, vector.getPrecision(), vector.getScale());
+ }
+ vector.setSafe(idx(), h);
+ vector.setValueCount(idx() + 1);
+ }
+
+ public void write${minor.class}(long start, ArrowBuf buffer){
+ vector.setSafe(idx(), 1, start, buffer);
+ vector.setValueCount(idx() + 1);
+ }
+
+ public void write${minor.class}(long start, ArrowBuf buffer, ArrowType arrowType){
+ DecimalUtility.checkPrecisionAndScale(((ArrowType.Decimal) arrowType).getPrecision(),
+ ((ArrowType.Decimal) arrowType).getScale(), vector.getPrecision(), vector.getScale());
+ vector.setSafe(idx(), 1, start, buffer);
+ vector.setValueCount(idx() + 1);
+ }
+
+ public void write${minor.class}(BigDecimal value){
+ // vector.setSafe already does precision and scale checking
+ vector.setSafe(idx(), value);
+ vector.setValueCount(idx() + 1);
+ }
+
+ public void writeBigEndianBytesTo${minor.class}(byte[] value, ArrowType arrowType){
+ DecimalUtility.checkPrecisionAndScale(((ArrowType.Decimal) arrowType).getPrecision(),
+ ((ArrowType.Decimal) arrowType).getScale(), vector.getPrecision(), vector.getScale());
+ vector.setBigEndianSafe(idx(), value);
+ vector.setValueCount(idx() + 1);
+ }
+
+ public void writeBigEndianBytesTo${minor.class}(byte[] value){
+ vector.setBigEndianSafe(idx(), value);
+ vector.setValueCount(idx() + 1);
+ }
+ </#if>
+
+
+ public void writeNull() {
+ vector.setNull(idx());
+ vector.setValueCount(idx()+1);
+ }
+ </#if>
+}
+
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/writer/${eName}Writer.java" />
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.complex.writer;
+
+<#include "/@includes/vv_imports.ftl" />
+/*
+ * This class is generated using FreeMarker on the ${.template_name} template.
+ */
+@SuppressWarnings("unused")
+public interface ${eName}Writer extends BaseWriter {
+ public void write(${minor.class}Holder h);
+
+ <#if minor.class?starts_with("Decimal")>@Deprecated</#if>
+ public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list>);
+<#if minor.class?starts_with("Decimal")>
+
+ public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list>, ArrowType arrowType);
+
+ public void write${minor.class}(${friendlyType} value);
+
+ public void writeBigEndianBytesTo${minor.class}(byte[] value, ArrowType arrowType);
+
+ @Deprecated
+ public void writeBigEndianBytesTo${minor.class}(byte[] value);
+</#if>
+}
+
+</#list>
+</#list>
+</#list>
diff --git a/src/arrow/java/vector/src/main/codegen/templates/DenseUnionReader.java b/src/arrow/java/vector/src/main/codegen/templates/DenseUnionReader.java
new file mode 100644
index 000000000..a085e03ea
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/DenseUnionReader.java
@@ -0,0 +1,229 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import org.apache.arrow.vector.complex.impl.UnionListReader;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+
+<@pp.dropOutputFile />
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/DenseUnionReader.java" />
+
+
+<#include "/@includes/license.ftl" />
+
+ package org.apache.arrow.vector.complex.impl;
+
+<#include "/@includes/vv_imports.ftl" />
+/**
+ * Source code generated using FreeMarker template ${.template_name}
+ */
+@SuppressWarnings("unused")
+public class DenseUnionReader extends AbstractFieldReader {
+
+ private BaseReader[] readers = new BaseReader[Byte.MAX_VALUE + 1];
+ public DenseUnionVector data;
+
+ public DenseUnionReader(DenseUnionVector data) {
+ this.data = data;
+ }
+
+ public MinorType getMinorType() {
+ byte typeId = data.getTypeId(idx());
+ return data.getVectorByType(typeId).getMinorType();
+ }
+
+ public byte getTypeId() {
+ return data.getTypeId(idx());
+ }
+
+ @Override
+ public Field getField() {
+ return data.getField();
+ }
+
+ public boolean isSet(){
+ return !data.isNull(idx());
+ }
+
+ public void read(DenseUnionHolder holder) {
+ holder.reader = this;
+ holder.isSet = this.isSet() ? 1 : 0;
+ holder.typeId = getTypeId();
+ }
+
+ public void read(int index, UnionHolder holder) {
+ byte typeId = data.getTypeId(index);
+ getList(typeId).read(index, holder);
+ }
+
+ private FieldReader getReaderForIndex(int index) {
+ byte typeId = data.getTypeId(index);
+ MinorType minorType = data.getVectorByType(typeId).getMinorType();
+ FieldReader reader = (FieldReader) readers[typeId];
+ if (reader != null) {
+ return reader;
+ }
+ switch (minorType) {
+ case NULL:
+ reader = NullReader.INSTANCE;
+ break;
+ case STRUCT:
+ reader = (FieldReader) getStruct(typeId);
+ break;
+ case LIST:
+ reader = (FieldReader) getList(typeId);
+ break;
+ <#list vv.types as type>
+ <#list type.minor as minor>
+ <#assign name = minor.class?cap_first />
+ <#assign uncappedName = name?uncap_first/>
+ <#if !minor.typeParams?? || minor.class?starts_with("Decimal")>
+ case ${name?upper_case}:
+ reader = (FieldReader) get${name}(typeId);
+ break;
+ </#if>
+ </#list>
+ </#list>
+ default:
+ throw new UnsupportedOperationException("Unsupported type: " + MinorType.values()[typeId]);
+ }
+ return reader;
+ }
+
+ private SingleStructReaderImpl structReader;
+
+ private StructReader getStruct(byte typeId) {
+ StructReader structReader = (StructReader) readers[typeId];
+ if (structReader == null) {
+ structReader = (SingleStructReaderImpl) data.getVectorByType(typeId).getReader();
+ structReader.setPosition(idx());
+ readers[typeId] = structReader;
+ }
+ return structReader;
+ }
+
+ private UnionListReader listReader;
+
+ private FieldReader getList(byte typeId) {
+ UnionListReader listReader = (UnionListReader) readers[typeId];
+ if (listReader == null) {
+ listReader = new UnionListReader((ListVector) data.getVectorByType(typeId));
+ listReader.setPosition(idx());
+ readers[typeId] = listReader;
+ }
+ return listReader;
+ }
+
+ private UnionMapReader mapReader;
+
+ private FieldReader getMap(byte typeId) {
+ UnionMapReader mapReader = (UnionMapReader) readers[typeId];
+ if (mapReader == null) {
+ mapReader = new UnionMapReader((MapVector) data.getVectorByType(typeId));
+ mapReader.setPosition(idx());
+ readers[typeId] = mapReader;
+ }
+ return mapReader;
+ }
+
+ @Override
+ public java.util.Iterator<String> iterator() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void copyAsValue(UnionWriter writer) {
+ writer.data.copyFrom(idx(), writer.idx(), data);
+ }
+
+ <#list ["Object", "BigDecimal", "Short", "Integer", "Long", "Boolean",
+ "LocalDateTime", "Duration", "Period", "Double", "Float",
+ "Character", "Text", "Byte", "byte[]", "PeriodDuration"] as friendlyType>
+ <#assign safeType=friendlyType />
+ <#if safeType=="byte[]"><#assign safeType="ByteArray" /></#if>
+
+ @Override
+ public ${friendlyType} read${safeType}() {
+ return getReaderForIndex(idx()).read${safeType}();
+ }
+
+ </#list>
+
+ public int size() {
+ return getReaderForIndex(idx()).size();
+ }
+
+ <#list vv.types as type>
+ <#list type.minor as minor>
+ <#assign name = minor.class?cap_first />
+ <#assign uncappedName = name?uncap_first/>
+ <#assign boxedType = (minor.boxedType!type.boxedType) />
+ <#assign javaType = (minor.javaType!type.javaType) />
+ <#assign friendlyType = (minor.friendlyType!minor.boxedType!type.boxedType) />
+ <#assign safeType=friendlyType />
+ <#if safeType=="byte[]"><#assign safeType="ByteArray" /></#if>
+ <#if !minor.typeParams?? || minor.class?starts_with("Decimal")>
+
+ private ${name}ReaderImpl get${name}(byte typeId) {
+ ${name}ReaderImpl reader = (${name}ReaderImpl) readers[typeId];
+ if (reader == null) {
+ reader = new ${name}ReaderImpl((${name}Vector) data.getVectorByType(typeId));
+ reader.setPosition(idx());
+ readers[typeId] = reader;
+ }
+ return reader;
+ }
+
+ public void read(Nullable${name}Holder holder){
+ getReaderForIndex(idx()).read(holder);
+ }
+
+ public void copyAsValue(${name}Writer writer){
+ getReaderForIndex(idx()).copyAsValue(writer);
+ }
+ </#if>
+ </#list>
+ </#list>
+
+ @Override
+ public void copyAsValue(ListWriter writer) {
+ ComplexCopier.copy(this, (FieldWriter) writer);
+ }
+
+ @Override
+ public void setPosition(int index) {
+ super.setPosition(index);
+ byte typeId = data.getTypeId(index);
+ if (readers[typeId] != null) {
+ int offset = data.getOffset(index);
+ readers[typeId].setPosition(offset);
+ }
+ }
+
+ public FieldReader reader(byte typeId, String name){
+ return getStruct(typeId).reader(name);
+ }
+
+ public FieldReader reader(byte typeId) {
+ return getList(typeId).reader();
+ }
+
+ public boolean next() {
+ return getReaderForIndex(idx()).next();
+ }
+}
diff --git a/src/arrow/java/vector/src/main/codegen/templates/DenseUnionVector.java b/src/arrow/java/vector/src/main/codegen/templates/DenseUnionVector.java
new file mode 100644
index 000000000..63f4f5876
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/DenseUnionVector.java
@@ -0,0 +1,943 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.ReferenceManager;
+import org.apache.arrow.memory.util.CommonUtil;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.BaseValueVector;
+import org.apache.arrow.vector.BitVectorHelper;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.complex.AbstractStructVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.NonNullableStructVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.UnionMode;
+import org.apache.arrow.vector.compare.RangeEqualsVisitor;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.DataSizeRoundingUtil;
+import org.apache.arrow.vector.util.TransferPair;
+
+import java.util.Arrays;
+import java.util.stream.Collectors;
+
+<@pp.dropOutputFile />
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/DenseUnionVector.java" />
+
+
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.complex;
+
+<#include "/@includes/vv_imports.ftl" />
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import org.apache.arrow.memory.util.CommonUtil;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.memory.util.hash.SimpleHasher;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.complex.impl.ComplexCopier;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.BaseValueVector;
+import org.apache.arrow.vector.util.OversizedAllocationException;
+import org.apache.arrow.util.Preconditions;
+
+import static org.apache.arrow.vector.types.UnionMode.Dense;
+
+
+
+/*
+ * This class is generated using freemarker and the ${.template_name} template.
+ */
+@SuppressWarnings("unused")
+
+
+/**
+ * A vector which can hold values of different types. It does so by using a StructVector which contains a vector for each
+ * primitive type that is stored. StructVector is used in order to take advantage of its serialization/deserialization methods,
+ * as well as the addOrGet method.
+ *
+ * For performance reasons, DenseUnionVector stores a cached reference to each subtype vector, to avoid having to do the struct lookup
+ * each time the vector is accessed.
+ * Source code generated using FreeMarker template ${.template_name}
+ */
+public class DenseUnionVector extends AbstractContainerVector implements FieldVector {
+ int valueCount;
+
+ NonNullableStructVector internalStruct;
+ private ArrowBuf typeBuffer;
+ private ArrowBuf offsetBuffer;
+
+ /**
+ * The key is type Id, and the value is vector.
+ */
+ private ValueVector[] childVectors = new ValueVector[Byte.MAX_VALUE + 1];
+
+ /**
+ * The index is the type id, and the value is the type field.
+ */
+ private Field[] typeFields = new Field[Byte.MAX_VALUE + 1];
+ /**
+ * The index is the index into the typeFields array, and the value is the logical field id.
+ */
+ private byte[] typeMapFields = new byte[Byte.MAX_VALUE + 1];
+
+ /**
+ * The next type id to allocate.
+ */
+ private byte nextTypeId = 0;
+
+ private FieldReader reader;
+
+ private long typeBufferAllocationSizeInBytes;
+ private long offsetBufferAllocationSizeInBytes;
+
+ private final FieldType fieldType;
+
+ public static final byte TYPE_WIDTH = 1;
+ public static final byte OFFSET_WIDTH = 4;
+
+ private static final FieldType INTERNAL_STRUCT_TYPE = new FieldType(/*nullable*/ false,
+ ArrowType.Struct.INSTANCE, /*dictionary*/ null, /*metadata*/ null);
+
+ public static DenseUnionVector empty(String name, BufferAllocator allocator) {
+ FieldType fieldType = FieldType.nullable(new ArrowType.Union(
+ UnionMode.Dense, null));
+ return new DenseUnionVector(name, allocator, fieldType, null);
+ }
+
+ public DenseUnionVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) {
+ super(name, allocator, callBack);
+ this.fieldType = fieldType;
+ this.internalStruct = new NonNullableStructVector(
+ "internal",
+ allocator,
+ INTERNAL_STRUCT_TYPE,
+ callBack,
+ AbstractStructVector.ConflictPolicy.CONFLICT_REPLACE,
+ false);
+ this.typeBuffer = allocator.getEmpty();
+ this.typeBufferAllocationSizeInBytes = BaseValueVector.INITIAL_VALUE_ALLOCATION * TYPE_WIDTH;
+ this.offsetBuffer = allocator.getEmpty();
+ this.offsetBufferAllocationSizeInBytes = BaseValueVector.INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH;
+ }
+
+ public BufferAllocator getAllocator() {
+ return allocator;
+ }
+
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.DENSEUNION;
+ }
+
+ @Override
+ public void initializeChildrenFromFields(List<Field> children) {
+ for (Field field : children) {
+ byte typeId = registerNewTypeId(field);
+ FieldVector vector = (FieldVector) internalStruct.add(field.getName(), field.getFieldType());
+ vector.initializeChildrenFromFields(field.getChildren());
+ childVectors[typeId] = vector;
+ }
+ }
+
+ @Override
+ public List<FieldVector> getChildrenFromFields() {
+ return internalStruct.getChildrenFromFields();
+ }
+
+ @Override
+ public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers) {
+ if (ownBuffers.size() != 2) {
+ throw new IllegalArgumentException("Illegal buffer count for dense union with type " + getField().getFieldType() +
+ ", expected " + 2 + ", got: " + ownBuffers.size());
+ }
+
+ ArrowBuf buffer = ownBuffers.get(0);
+ typeBuffer.getReferenceManager().release();
+ typeBuffer = buffer.getReferenceManager().retain(buffer, allocator);
+ typeBufferAllocationSizeInBytes = typeBuffer.capacity();
+
+ buffer = ownBuffers.get(1);
+ offsetBuffer.getReferenceManager().release();
+ offsetBuffer = buffer.getReferenceManager().retain(buffer, allocator);
+ offsetBufferAllocationSizeInBytes = offsetBuffer.capacity();
+
+ this.valueCount = fieldNode.getLength();
+ }
+
+ @Override
+ public List<ArrowBuf> getFieldBuffers() {
+ List<ArrowBuf> result = new ArrayList<>(2);
+ setReaderAndWriterIndex();
+ result.add(typeBuffer);
+ result.add(offsetBuffer);
+
+ return result;
+ }
+
+ private void setReaderAndWriterIndex() {
+ typeBuffer.readerIndex(0);
+ typeBuffer.writerIndex(valueCount * TYPE_WIDTH);
+
+ offsetBuffer.readerIndex(0);
+ offsetBuffer.writerIndex((long) valueCount * OFFSET_WIDTH);
+ }
+
+ /**
+ * Get the inner vectors.
+ *
+ * @deprecated This API will be removed as the current implementations no longer support inner vectors.
+ *
+ * @return the inner vectors for this field as defined by the TypeLayout
+ */
+ @Override
+ @Deprecated
+ public List<BufferBacked> getFieldInnerVectors() {
+ throw new UnsupportedOperationException("There are no inner vectors. Use geFieldBuffers");
+ }
+
+ private String fieldName(byte typeId, MinorType type) {
+ return type.name().toLowerCase() + typeId;
+ }
+
+ private FieldType fieldType(MinorType type) {
+ return FieldType.nullable(type.getType());
+ }
+
+ public synchronized byte registerNewTypeId(Field field) {
+ if (nextTypeId == typeFields.length) {
+ throw new IllegalStateException("Dense union vector support at most " +
+ typeFields.length + " relative types. Please use union of union instead");
+ }
+ byte typeId = nextTypeId;
+ if (fieldType != null) {
+ int[] typeIds = ((ArrowType.Union) fieldType.getType()).getTypeIds();
+ if (typeIds != null) {
+ int thisTypeId = typeIds[nextTypeId];
+ if (thisTypeId > Byte.MAX_VALUE) {
+ throw new IllegalStateException("Dense union vector types must be bytes. " + thisTypeId + " is too large");
+ }
+ typeId = (byte) thisTypeId;
+ }
+ }
+ typeFields[typeId] = field;
+ typeMapFields[nextTypeId] = typeId;
+ this.nextTypeId += 1;
+ return typeId;
+ }
+
+ private <T extends FieldVector> T addOrGet(byte typeId, MinorType minorType, Class<T> c) {
+ return internalStruct.addOrGet(fieldName(typeId, minorType), fieldType(minorType), c);
+ }
+
+ private <T extends FieldVector> T addOrGet(byte typeId, MinorType minorType, ArrowType arrowType, Class<T> c) {
+ return internalStruct.addOrGet(fieldName(typeId, minorType), FieldType.nullable(arrowType), c);
+ }
+
+ @Override
+ public long getOffsetBufferAddress() {
+ return offsetBuffer.memoryAddress();
+ }
+
+ @Override
+ public long getDataBufferAddress() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long getValidityBufferAddress() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public ArrowBuf getValidityBuffer() { throw new UnsupportedOperationException(); }
+
+ @Override
+ public ArrowBuf getOffsetBuffer() { return offsetBuffer; }
+
+ public ArrowBuf getTypeBuffer() { return typeBuffer; }
+
+ @Override
+ public ArrowBuf getDataBuffer() { throw new UnsupportedOperationException(); }
+
+ public StructVector getStruct(byte typeId) {
+ StructVector structVector = typeId < 0 ? null : (StructVector) childVectors[typeId];
+ if (structVector == null) {
+ int vectorCount = internalStruct.size();
+ structVector = addOrGet(typeId, MinorType.STRUCT, StructVector.class);
+ if (internalStruct.size() > vectorCount) {
+ structVector.allocateNew();
+ childVectors[typeId] = structVector;
+ if (callBack != null) {
+ callBack.doWork();
+ }
+ }
+ }
+ return structVector;
+ }
+
+ <#list vv.types as type>
+ <#list type.minor as minor>
+ <#assign name = minor.class?cap_first />
+ <#assign fields = minor.fields!type.fields />
+ <#assign uncappedName = name?uncap_first/>
+ <#assign lowerCaseName = name?lower_case/>
+ <#if !minor.typeParams?? || minor.class?starts_with("Decimal")>
+
+ public ${name}Vector get${name}Vector(byte typeId<#if minor.class?starts_with("Decimal")>, ArrowType arrowType</#if>) {
+ ValueVector vector = typeId < 0 ? null : childVectors[typeId];
+ if (vector == null) {
+ int vectorCount = internalStruct.size();
+ vector = addOrGet(typeId, MinorType.${name?upper_case}<#if minor.class?starts_with("Decimal")>, arrowType</#if>, ${name}Vector.class);
+ childVectors[typeId] = vector;
+ if (internalStruct.size() > vectorCount) {
+ vector.allocateNew();
+ if (callBack != null) {
+ callBack.doWork();
+ }
+ }
+ }
+ return (${name}Vector) vector;
+ }
+ </#if>
+ </#list>
+ </#list>
+
+ public ListVector getList(byte typeId) {
+ ListVector listVector = typeId < 0 ? null : (ListVector) childVectors[typeId];
+ if (listVector == null) {
+ int vectorCount = internalStruct.size();
+ listVector = addOrGet(typeId, MinorType.LIST, ListVector.class);
+ if (internalStruct.size() > vectorCount) {
+ listVector.allocateNew();
+ childVectors[typeId] = listVector;
+ if (callBack != null) {
+ callBack.doWork();
+ }
+ }
+ }
+ return listVector;
+ }
+
+ public MapVector getMap(byte typeId) {
+ MapVector mapVector = typeId < 0 ? null : (MapVector) childVectors[typeId];
+ if (mapVector == null) {
+ int vectorCount = internalStruct.size();
+ mapVector = addOrGet(typeId, MinorType.MAP, MapVector.class);
+ if (internalStruct.size() > vectorCount) {
+ mapVector.allocateNew();
+ childVectors[typeId] = mapVector;
+ if (callBack != null) {
+ callBack.doWork();
+ }
+ }
+ }
+ return mapVector;
+ }
+
+ public byte getTypeId(int index) {
+ return typeBuffer.getByte(index * TYPE_WIDTH);
+ }
+
+ public ValueVector getVectorByType(byte typeId) {
+ return typeId < 0 ? null : childVectors[typeId];
+ }
+
+ @Override
+ public void allocateNew() throws OutOfMemoryException {
+ /* new allocation -- clear the current buffers */
+ clear();
+ internalStruct.allocateNew();
+ try {
+ allocateTypeBuffer();
+ allocateOffsetBuffer();
+ } catch (Exception e) {
+ clear();
+ throw e;
+ }
+ }
+
+ @Override
+ public boolean allocateNewSafe() {
+ /* new allocation -- clear the current buffers */
+ clear();
+ boolean safe = internalStruct.allocateNewSafe();
+ if (!safe) { return false; }
+ try {
+ allocateTypeBuffer();
+ allocateOffsetBuffer();
+ } catch (Exception e) {
+ clear();
+ return false;
+ }
+
+ return true;
+ }
+
+ private void allocateTypeBuffer() {
+ typeBuffer = allocator.buffer(typeBufferAllocationSizeInBytes);
+ typeBuffer.readerIndex(0);
+ setNegative(0, typeBuffer.capacity());
+ }
+
+ private void allocateOffsetBuffer() {
+ offsetBuffer = allocator.buffer(offsetBufferAllocationSizeInBytes);
+ offsetBuffer.readerIndex(0);
+ offsetBuffer.setZero(0, offsetBuffer.capacity());
+ }
+
+
+ @Override
+ public void reAlloc() {
+ internalStruct.reAlloc();
+ reallocTypeBuffer();
+ reallocOffsetBuffer();
+ }
+
+ public int getOffset(int index) {
+ return offsetBuffer.getInt((long) index * OFFSET_WIDTH);
+ }
+
+ private void reallocTypeBuffer() {
+ final long currentBufferCapacity = typeBuffer.capacity();
+ long newAllocationSize = currentBufferCapacity * 2;
+ if (newAllocationSize == 0) {
+ if (typeBufferAllocationSizeInBytes > 0) {
+ newAllocationSize = typeBufferAllocationSizeInBytes;
+ } else {
+ newAllocationSize = BaseValueVector.INITIAL_VALUE_ALLOCATION * TYPE_WIDTH * 2;
+ }
+ }
+
+ newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize);
+ assert newAllocationSize >= 1;
+
+ if (newAllocationSize > BaseValueVector.MAX_ALLOCATION_SIZE) {
+ throw new OversizedAllocationException("Unable to expand the buffer");
+ }
+
+ final ArrowBuf newBuf = allocator.buffer((int)newAllocationSize);
+ newBuf.setBytes(0, typeBuffer, 0, currentBufferCapacity);
+ typeBuffer.getReferenceManager().release(1);
+ typeBuffer = newBuf;
+ typeBufferAllocationSizeInBytes = (int)newAllocationSize;
+ setNegative(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity);
+ }
+
+ private void reallocOffsetBuffer() {
+ final long currentBufferCapacity = offsetBuffer.capacity();
+ long newAllocationSize = currentBufferCapacity * 2;
+ if (newAllocationSize == 0) {
+ if (offsetBufferAllocationSizeInBytes > 0) {
+ newAllocationSize = offsetBufferAllocationSizeInBytes;
+ } else {
+ newAllocationSize = BaseValueVector.INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH * 2;
+ }
+ }
+
+ newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize);
+ assert newAllocationSize >= 1;
+
+ if (newAllocationSize > BaseValueVector.MAX_ALLOCATION_SIZE) {
+ throw new OversizedAllocationException("Unable to expand the buffer");
+ }
+
+ final ArrowBuf newBuf = allocator.buffer((int) newAllocationSize);
+ newBuf.setBytes(0, offsetBuffer, 0, currentBufferCapacity);
+ newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity);
+ offsetBuffer.getReferenceManager().release(1);
+ offsetBuffer = newBuf;
+ offsetBufferAllocationSizeInBytes = (int) newAllocationSize;
+ }
+
+ @Override
+ public void setInitialCapacity(int numRecords) { }
+
+ @Override
+ public int getValueCapacity() {
+ long capacity = getTypeBufferValueCapacity();
+ long offsetCapacity = getOffsetBufferValueCapacity();
+ if (offsetCapacity < capacity) {
+ capacity = offsetCapacity;
+ }
+ long structCapacity = internalStruct.getValueCapacity();
+ if (structCapacity < capacity) {
+ structCapacity = capacity;
+ }
+ return (int) capacity;
+ }
+
+ @Override
+ public void close() {
+ clear();
+ }
+
+ @Override
+ public void clear() {
+ valueCount = 0;
+ typeBuffer.getReferenceManager().release();
+ typeBuffer = allocator.getEmpty();
+ offsetBuffer.getReferenceManager().release();
+ offsetBuffer = allocator.getEmpty();
+ internalStruct.clear();
+ }
+
+ @Override
+ public void reset() {
+ valueCount = 0;
+ setNegative(0, typeBuffer.capacity());
+ offsetBuffer.setZero(0, offsetBuffer.capacity());
+ internalStruct.reset();
+ }
+
+ @Override
+ public Field getField() {
+ int childCount = (int) Arrays.stream(typeFields).filter(field -> field != null).count();
+ List<org.apache.arrow.vector.types.pojo.Field> childFields = new ArrayList<>(childCount);
+ int[] typeIds = new int[childCount];
+ for (int i = 0; i < typeFields.length; i++) {
+ if (typeFields[i] != null) {
+ int curIdx = childFields.size();
+ typeIds[curIdx] = i;
+ childFields.add(typeFields[i]);
+ }
+ }
+
+ FieldType fieldType;
+ if (this.fieldType == null) {
+ fieldType = FieldType.nullable(new ArrowType.Union(Dense, typeIds));
+ } else {
+ final UnionMode mode = UnionMode.Dense;
+ fieldType = new FieldType(this.fieldType.isNullable(), new ArrowType.Union(mode, typeIds),
+ this.fieldType.getDictionary(), this.fieldType.getMetadata());
+ }
+
+ return new Field(name, fieldType, childFields);
+ }
+
+ @Override
+ public TransferPair getTransferPair(BufferAllocator allocator) {
+ return getTransferPair(name, allocator);
+ }
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return getTransferPair(ref, allocator, null);
+ }
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) {
+ return new org.apache.arrow.vector.complex.DenseUnionVector.TransferImpl(ref, allocator, callBack);
+ }
+
+ @Override
+ public TransferPair makeTransferPair(ValueVector target) {
+ return new TransferImpl((DenseUnionVector) target);
+ }
+
+ @Override
+ public void copyFrom(int inIndex, int outIndex, ValueVector from) {
+ Preconditions.checkArgument(this.getMinorType() == from.getMinorType());
+ DenseUnionVector fromCast = (DenseUnionVector) from;
+ int inOffset = fromCast.offsetBuffer.getInt((long) inIndex * OFFSET_WIDTH);
+ fromCast.getReader().setPosition(inOffset);
+ int outOffset = offsetBuffer.getInt((long) outIndex * OFFSET_WIDTH);
+ getWriter().setPosition(outOffset);
+ ComplexCopier.copy(fromCast.reader, writer);
+ }
+
+ @Override
+ public void copyFromSafe(int inIndex, int outIndex, ValueVector from) {
+ copyFrom(inIndex, outIndex, from);
+ }
+
+ public FieldVector addVector(byte typeId, FieldVector v) {
+ final String name = v.getName().isEmpty() ? fieldName(typeId, v.getMinorType()) : v.getName();
+ Preconditions.checkState(internalStruct.getChild(name) == null, String.format("%s vector already exists", name));
+ final FieldVector newVector = internalStruct.addOrGet(name, v.getField().getFieldType(), v.getClass());
+ v.makeTransferPair(newVector).transfer();
+ internalStruct.putChild(name, newVector);
+ childVectors[typeId] = newVector;
+ if (callBack != null) {
+ callBack.doWork();
+ }
+ return newVector;
+ }
+
+ private class TransferImpl implements TransferPair {
+ private final TransferPair[] internalTransferPairs = new TransferPair[nextTypeId];
+ private final DenseUnionVector to;
+
+ public TransferImpl(String name, BufferAllocator allocator, CallBack callBack) {
+ to = new DenseUnionVector(name, allocator, null, callBack);
+ internalStruct.makeTransferPair(to.internalStruct);
+ createTransferPairs();
+ }
+
+ public TransferImpl(DenseUnionVector to) {
+ this.to = to;
+ internalStruct.makeTransferPair(to.internalStruct);
+ createTransferPairs();
+ }
+
+ private void createTransferPairs() {
+ for (int i = 0; i < nextTypeId; i++) {
+ ValueVector srcVec = internalStruct.getVectorById(i);
+ ValueVector dstVec = to.internalStruct.getVectorById(i);
+ to.typeFields[i] = typeFields[i];
+ to.typeMapFields[i] = typeMapFields[i];
+ to.childVectors[i] = dstVec;
+ internalTransferPairs[i] = srcVec.makeTransferPair(dstVec);
+ }
+ }
+
+ @Override
+ public void transfer() {
+ to.clear();
+
+ ReferenceManager refManager = typeBuffer.getReferenceManager();
+ to.typeBuffer = refManager.transferOwnership(typeBuffer, to.allocator).getTransferredBuffer();
+
+ refManager = offsetBuffer.getReferenceManager();
+ to.offsetBuffer = refManager.transferOwnership(offsetBuffer, to.allocator).getTransferredBuffer();
+
+ for (int i = 0; i < nextTypeId; i++) {
+ if (internalTransferPairs[i] != null) {
+ internalTransferPairs[i].transfer();
+ to.childVectors[i] = internalTransferPairs[i].getTo();
+ }
+ }
+ to.valueCount = valueCount;
+ clear();
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ to.clear();
+
+ // transfer type buffer
+ int startPoint = startIndex * TYPE_WIDTH;
+ int sliceLength = length * TYPE_WIDTH;
+ ArrowBuf slicedBuffer = typeBuffer.slice(startPoint, sliceLength);
+ ReferenceManager refManager = slicedBuffer.getReferenceManager();
+ to.typeBuffer = refManager.transferOwnership(slicedBuffer, to.allocator).getTransferredBuffer();
+
+ // transfer offset byffer
+ while (to.offsetBuffer.capacity() < (long) length * OFFSET_WIDTH) {
+ to.reallocOffsetBuffer();
+ }
+
+ int [] typeCounts = new int[nextTypeId];
+ int [] typeStarts = new int[nextTypeId];
+ for (int i = 0; i < typeCounts.length; i++) {
+ typeCounts[i] = 0;
+ typeStarts[i] = -1;
+ }
+
+ for (int i = startIndex; i < startIndex + length; i++) {
+ byte typeId = typeBuffer.getByte(i);
+ to.offsetBuffer.setInt((long) (i - startIndex) * OFFSET_WIDTH, typeCounts[typeId]);
+ typeCounts[typeId] += 1;
+ if (typeStarts[typeId] == -1) {
+ typeStarts[typeId] = offsetBuffer.getInt((long) i * OFFSET_WIDTH);
+ }
+ }
+
+ // transfer vector values
+ for (int i = 0; i < nextTypeId; i++) {
+ if (typeCounts[i] > 0 && typeStarts[i] != -1) {
+ internalTransferPairs[i].splitAndTransfer(typeStarts[i], typeCounts[i]);
+ to.childVectors[i] = internalTransferPairs[i].getTo();
+ }
+ }
+
+ to.setValueCount(length);
+ }
+
+ @Override
+ public ValueVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void copyValueSafe(int from, int to) {
+ this.to.copyFrom(from, to, DenseUnionVector.this);
+ }
+ }
+
+ @Override
+ public FieldReader getReader() {
+ if (reader == null) {
+ reader = new DenseUnionReader(this);
+ }
+ return reader;
+ }
+
+ public FieldWriter getWriter() {
+ if (writer == null) {
+ writer = new DenseUnionWriter(this);
+ }
+ return writer;
+ }
+
+ @Override
+ public int getBufferSize() {
+ return this.getBufferSizeFor(this.valueCount);
+ }
+
+ @Override
+ public int getBufferSizeFor(final int count) {
+ if (count == 0) {
+ return 0;
+ }
+ return (int) (count * TYPE_WIDTH + (long) count * OFFSET_WIDTH
+ + DataSizeRoundingUtil.divideBy8Ceil(count) + internalStruct.getBufferSizeFor(count));
+ }
+
+ @Override
+ public ArrowBuf[] getBuffers(boolean clear) {
+ List<ArrowBuf> list = new java.util.ArrayList<>();
+ setReaderAndWriterIndex();
+ if (getBufferSize() != 0) {
+ list.add(typeBuffer);
+ list.add(offsetBuffer);
+ list.addAll(java.util.Arrays.asList(internalStruct.getBuffers(clear)));
+ }
+ if (clear) {
+ valueCount = 0;
+ typeBuffer.getReferenceManager().retain();
+ typeBuffer.close();
+ typeBuffer = allocator.getEmpty();
+ offsetBuffer.getReferenceManager().retain();
+ offsetBuffer.close();
+ offsetBuffer = allocator.getEmpty();
+ }
+ return list.toArray(new ArrowBuf[list.size()]);
+ }
+
+ @Override
+ public Iterator<ValueVector> iterator() {
+ return internalStruct.iterator();
+ }
+
+ private ValueVector getVector(int index) {
+ byte typeId = typeBuffer.getByte(index * TYPE_WIDTH);
+ return getVectorByType(typeId);
+ }
+
+ public Object getObject(int index) {
+ ValueVector vector = getVector(index);
+ if (vector != null) {
+ int offset = offsetBuffer.getInt((long) index * OFFSET_WIDTH);
+ return vector.isNull(offset) ? null : vector.getObject(offset);
+ }
+ return null;
+ }
+
+ public void get(int index, DenseUnionHolder holder) {
+ FieldReader reader = new DenseUnionReader(DenseUnionVector.this);
+ reader.setPosition(index);
+ holder.reader = reader;
+ }
+
+ public int getValueCount() {
+ return valueCount;
+ }
+
+ /**
+ * IMPORTANT: Union types always return non null as there is no validity buffer.
+ *
+ * To check validity correctly you must check the underlying vector.
+ */
+ public boolean isNull(int index) {
+ return false;
+ }
+
+ @Override
+ public int getNullCount() {
+ return 0;
+ }
+
+ public int isSet(int index) {
+ return isNull(index) ? 0 : 1;
+ }
+
+ DenseUnionWriter writer;
+
+ public void setValueCount(int valueCount) {
+ this.valueCount = valueCount;
+ while (valueCount > getTypeBufferValueCapacity()) {
+ reallocTypeBuffer();
+ reallocOffsetBuffer();
+ }
+ setChildVectorValueCounts();
+ }
+
+ private void setChildVectorValueCounts() {
+ int [] counts = new int[Byte.MAX_VALUE + 1];
+ for (int i = 0; i < this.valueCount; i++) {
+ byte typeId = getTypeId(i);
+ if (typeId != -1) {
+ counts[typeId] += 1;
+ }
+ }
+ for (int i = 0; i < nextTypeId; i++) {
+ childVectors[typeMapFields[i]].setValueCount(counts[typeMapFields[i]]);
+ }
+ }
+
+ public void setSafe(int index, DenseUnionHolder holder) {
+ FieldReader reader = holder.reader;
+ if (writer == null) {
+ writer = new DenseUnionWriter(DenseUnionVector.this);
+ }
+ int offset = offsetBuffer.getInt((long) index * OFFSET_WIDTH);
+ MinorType type = reader.getMinorType();
+ writer.setPosition(offset);
+ byte typeId = holder.typeId;
+ switch (type) {
+ <#list vv.types as type>
+ <#list type.minor as minor>
+ <#assign name = minor.class?cap_first />
+ <#assign fields = minor.fields!type.fields />
+ <#assign uncappedName = name?uncap_first/>
+ <#if !minor.typeParams?? || minor.class?starts_with("Decimal")>
+ case ${name?upper_case}:
+ Nullable${name}Holder ${uncappedName}Holder = new Nullable${name}Holder();
+ reader.read(${uncappedName}Holder);
+ setSafe(index, ${uncappedName}Holder);
+ break;
+ </#if>
+ </#list>
+ </#list>
+ case STRUCT:
+ case LIST: {
+ setTypeId(index, typeId);
+ ComplexCopier.copy(reader, writer);
+ break;
+ }
+ default:
+ throw new UnsupportedOperationException();
+ }
+ }
+ <#list vv.types as type>
+ <#list type.minor as minor>
+ <#assign name = minor.class?cap_first />
+ <#assign fields = minor.fields!type.fields />
+ <#assign uncappedName = name?uncap_first/>
+ <#if !minor.typeParams?? || minor.class?starts_with("Decimal")>
+ public void setSafe(int index, Nullable${name}Holder holder) {
+ while (index >= getOffsetBufferValueCapacity()) {
+ reallocOffsetBuffer();
+ }
+ byte typeId = getTypeId(index);
+ ${name}Vector vector = get${name}Vector(typeId<#if minor.class?starts_with("Decimal")>, new ArrowType.Decimal(holder.precision, holder.scale, holder.WIDTH * 8)</#if>);
+ int offset = vector.getValueCount();
+ vector.setValueCount(offset + 1);
+ vector.setSafe(offset, holder);
+ offsetBuffer.setInt((long) index * OFFSET_WIDTH, offset);
+ }
+ </#if>
+ </#list>
+ </#list>
+
+ public void setTypeId(int index, byte typeId) {
+ while (index >= getTypeBufferValueCapacity()) {
+ reallocTypeBuffer();
+ }
+ typeBuffer.setByte(index * TYPE_WIDTH , typeId);
+ }
+
+ private int getTypeBufferValueCapacity() {
+ return (int) typeBuffer.capacity() / TYPE_WIDTH;
+ }
+
+ private long getOffsetBufferValueCapacity() {
+ return offsetBuffer.capacity() / OFFSET_WIDTH;
+ }
+
+ @Override
+ public int hashCode(int index, ArrowBufHasher hasher) {
+ if (isNull(index)) {
+ return 0;
+ }
+ int offset = offsetBuffer.getInt((long) index * OFFSET_WIDTH);
+ return getVector(index).hashCode(offset, hasher);
+ }
+
+ @Override
+ public int hashCode(int index) {
+ return hashCode(index, SimpleHasher.INSTANCE);
+ }
+
+ @Override
+ public <OUT, IN> OUT accept(VectorVisitor<OUT, IN> visitor, IN value) {
+ return visitor.visit(this, value);
+ }
+
+ @Override
+ public String getName() {
+ return name;
+ }
+
+ private void setNegative(long start, long end) {
+ for (long i = start;i < end; i++) {
+ typeBuffer.setByte(i, -1);
+ }
+ }
+
+ @Override
+ public <T extends FieldVector> T addOrGet(String name, FieldType fieldType, Class<T> clazz) {
+ return internalStruct.addOrGet(name, fieldType, clazz);
+ }
+
+ @Override
+ public <T extends FieldVector> T getChild(String name, Class<T> clazz) {
+ return internalStruct.getChild(name, clazz);
+ }
+
+ @Override
+ public VectorWithOrdinal getChildVectorWithOrdinal(String name) {
+ return internalStruct.getChildVectorWithOrdinal(name);
+ }
+
+ @Override
+ public int size() {
+ return internalStruct.size();
+ }
+
+ @Override
+ public void setInitialCapacity(int valueCount, double density) {
+ for (final ValueVector vector : internalStruct) {
+ if (vector instanceof DensityAwareVector) {
+ ((DensityAwareVector) vector).setInitialCapacity(valueCount, density);
+ } else {
+ vector.setInitialCapacity(valueCount);
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/codegen/templates/DenseUnionWriter.java b/src/arrow/java/vector/src/main/codegen/templates/DenseUnionWriter.java
new file mode 100644
index 000000000..e69a62a9e
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/DenseUnionWriter.java
@@ -0,0 +1,302 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.impl.NullableStructWriterFactory;
+import org.apache.arrow.vector.types.Types;
+
+<@pp.dropOutputFile />
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/DenseUnionWriter.java" />
+
+
+<#include "/@includes/license.ftl" />
+
+ package org.apache.arrow.vector.complex.impl;
+
+<#include "/@includes/vv_imports.ftl" />
+ import org.apache.arrow.vector.complex.writer.BaseWriter;
+ import org.apache.arrow.vector.types.Types.MinorType;
+
+/*
+ * This class is generated using freemarker and the ${.template_name} template.
+ */
+@SuppressWarnings("unused")
+public class DenseUnionWriter extends AbstractFieldWriter implements FieldWriter {
+
+ DenseUnionVector data;
+
+ private BaseWriter[] writers = new BaseWriter[Byte.MAX_VALUE + 1];
+ private final NullableStructWriterFactory nullableStructWriterFactory;
+
+ public DenseUnionWriter(DenseUnionVector vector) {
+ this(vector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance());
+ }
+
+ public DenseUnionWriter(DenseUnionVector vector, NullableStructWriterFactory nullableStructWriterFactory) {
+ data = vector;
+ this.nullableStructWriterFactory = nullableStructWriterFactory;
+ }
+
+ @Override
+ public void setPosition(int index) {
+ super.setPosition(index);
+ for (BaseWriter writer : writers) {
+ writer.setPosition(index);
+ }
+ }
+
+ @Override
+ public void start() {
+ byte typeId = data.getTypeId(idx());
+ getStructWriter((byte) idx()).start();
+ }
+
+ @Override
+ public void end() {
+ byte typeId = data.getTypeId(idx());
+ getStructWriter(typeId).end();
+ }
+
+ @Override
+ public void startList() {
+ byte typeId = data.getTypeId(idx());
+ getListWriter(typeId).startList();
+ }
+
+ @Override
+ public void endList() {
+ byte typeId = data.getTypeId(idx());
+ getListWriter(typeId).endList();
+ }
+
+ private StructWriter getStructWriter(byte typeId) {
+ StructWriter structWriter = (StructWriter) writers[typeId];
+ if (structWriter == null) {
+ structWriter = nullableStructWriterFactory.build((StructVector) data.getVectorByType(typeId));
+ writers[typeId] = structWriter;
+ }
+ return structWriter;
+ }
+
+ public StructWriter asStruct(byte typeId) {
+ data.setTypeId(idx(), typeId);
+ return getStructWriter(typeId);
+ }
+
+ private ListWriter getListWriter(byte typeId) {
+ ListWriter listWriter = (ListWriter) writers[typeId];
+ if (listWriter == null) {
+ listWriter = new UnionListWriter((ListVector) data.getVectorByType(typeId), nullableStructWriterFactory);
+ writers[typeId] = listWriter;
+ }
+ return listWriter;
+ }
+
+ public ListWriter asList(byte typeId) {
+ data.setTypeId(idx(), typeId);
+ return getListWriter(typeId);
+ }
+
+ private MapWriter getMapWriter(byte typeId) {
+ MapWriter mapWriter = (MapWriter) writers[typeId];
+ if (mapWriter == null) {
+ mapWriter = new UnionMapWriter((MapVector) data.getVectorByType(typeId));
+ writers[typeId] = mapWriter;
+ }
+ return mapWriter;
+ }
+
+ public MapWriter asMap(byte typeId) {
+ data.setTypeId(idx(), typeId);
+ return getMapWriter(typeId);
+ }
+
+ BaseWriter getWriter(byte typeId) {
+ MinorType minorType = data.getVectorByType(typeId).getMinorType();
+ switch (minorType) {
+ case STRUCT:
+ return getStructWriter(typeId);
+ case LIST:
+ return getListWriter(typeId);
+ case MAP:
+ return getMapWriter(typeId);
+ <#list vv.types as type>
+ <#list type.minor as minor>
+ <#assign name = minor.class?cap_first />
+ <#assign fields = minor.fields!type.fields />
+ <#assign uncappedName = name?uncap_first/>
+ <#if !minor.typeParams?? || minor.class?starts_with("Decimal")>
+ case ${name?upper_case}:
+ return get${name}Writer(typeId);
+ </#if>
+ </#list>
+ </#list>
+ default:
+ throw new UnsupportedOperationException("Unknown type: " + minorType);
+ }
+ }
+ <#list vv.types as type>
+ <#list type.minor as minor>
+ <#assign name = minor.class?cap_first />
+ <#assign fields = minor.fields!type.fields />
+ <#assign uncappedName = name?uncap_first/>
+ <#if !minor.typeParams?? || minor.class?starts_with("Decimal")>
+
+ private ${name}Writer get${name}Writer(byte typeId) {
+ ${name}Writer writer = (${name}Writer) writers[typeId];
+ if (writer == null) {
+ writer = new ${name}WriterImpl((${name}Vector) data.getVectorByType(typeId));
+ writers[typeId] = writer;
+ }
+ return writer;
+ }
+
+ public ${name}Writer as${name}(byte typeId) {
+ data.setTypeId(idx(), typeId);
+ return get${name}Writer(typeId);
+ }
+
+ @Override
+ public void write(${name}Holder holder) {
+ throw new UnsupportedOperationException();
+ }
+
+ public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list>, byte typeId<#if minor.class?starts_with("Decimal")>, ArrowType arrowType</#if>) {
+ data.setTypeId(idx(), typeId);
+ get${name}Writer(typeId).setPosition(data.getOffset(idx()));
+ get${name}Writer(typeId).write${name}(<#list fields as field>${field.name}<#if field_has_next>, </#if></#list><#if minor.class?starts_with("Decimal")>, arrowType</#if>);
+ }
+ </#if>
+ </#list>
+ </#list>
+
+ public void writeNull() {
+ }
+
+ @Override
+ public StructWriter struct() {
+ byte typeId = data.getTypeId(idx());
+ data.setTypeId(idx(), typeId);
+ getListWriter(typeId).setPosition(data.getOffset(idx()));
+ return getListWriter(typeId).struct();
+ }
+
+ @Override
+ public ListWriter list() {
+ byte typeId = data.getTypeId(idx());
+ data.setTypeId(idx(), typeId);
+ getListWriter(typeId).setPosition(data.getOffset(idx()));
+ return getListWriter(typeId).list();
+ }
+
+ @Override
+ public ListWriter list(String name) {
+ byte typeId = data.getTypeId(idx());
+ data.setTypeId(idx(), typeId);
+ getStructWriter(typeId).setPosition(data.getOffset(idx()));
+ return getStructWriter(typeId).list(name);
+ }
+
+ @Override
+ public MapWriter map() {
+ byte typeId = data.getTypeId(idx());
+ data.setTypeId(idx(), typeId);
+ getListWriter(typeId).setPosition(data.getOffset(idx()));
+ return getMapWriter(typeId).map();
+ }
+
+ @Override
+ public MapWriter map(String name) {
+ byte typeId = data.getTypeId(idx());
+ data.setTypeId(idx(), typeId);
+ getStructWriter(typeId).setPosition(data.getOffset(idx()));
+ return getStructWriter(typeId).map(name);
+ }
+
+ @Override
+ public MapWriter map(String name, boolean keysSorted) {
+ byte typeId = data.getTypeId(idx());
+ data.setTypeId(idx(), typeId);
+ getStructWriter(typeId).setPosition(data.getOffset(idx()));
+ return getStructWriter(typeId).map(name, keysSorted);
+ }
+
+ @Override
+ public StructWriter struct(String name) {
+ byte typeId = data.getTypeId(idx());
+ data.setTypeId(idx(), typeId);
+ getStructWriter(typeId).setPosition(data.getOffset(idx()));
+ return getStructWriter(typeId).struct(name);
+ }
+
+ <#list vv.types as type><#list type.minor as minor>
+ <#assign lowerName = minor.class?uncap_first />
+ <#if lowerName == "int" ><#assign lowerName = "integer" /></#if>
+ <#assign upperName = minor.class?upper_case />
+ <#assign capName = minor.class?cap_first />
+ <#if !minor.typeParams?? || minor.class?starts_with("Decimal") >
+ @Override
+ public ${capName}Writer ${lowerName}(String name) {
+ byte typeId = data.getTypeId(idx());
+ data.setTypeId(idx(), typeId);
+ getStructWriter(typeId).setPosition(data.getOffset(idx()));
+ return getStructWriter(typeId).${lowerName}(name);
+ }
+
+ @Override
+ public ${capName}Writer ${lowerName}() {
+ byte typeId = data.getTypeId(idx());
+ data.setTypeId(idx(), typeId);
+ getListWriter(typeId).setPosition(data.getOffset(idx()));
+ return getListWriter(typeId).${lowerName}();
+ }
+ </#if>
+ <#if minor.class?starts_with("Decimal")>
+ public ${capName}Writer ${lowerName}(String name<#list minor.typeParams as typeParam>, ${typeParam.type} ${typeParam.name}</#list>) {
+ byte typeId = data.getTypeId(idx());
+ data.setTypeId(idx(), typeId);
+ getStructWriter(typeId).setPosition(data.getOffset(idx()));
+ return getStructWriter(typeId).${lowerName}(name<#list minor.typeParams as typeParam>, ${typeParam.name}</#list>);
+ }
+ </#if>
+ </#list></#list>
+
+ @Override
+ public void allocate() {
+ data.allocateNew();
+ }
+
+ @Override
+ public void clear() {
+ data.clear();
+ }
+
+ @Override
+ public void close() throws Exception {
+ data.close();
+ }
+
+ @Override
+ public Field getField() {
+ return data.getField();
+ }
+
+ @Override
+ public int getValueCapacity() {
+ return data.getValueCapacity();
+ }
+}
diff --git a/src/arrow/java/vector/src/main/codegen/templates/HolderReaderImpl.java b/src/arrow/java/vector/src/main/codegen/templates/HolderReaderImpl.java
new file mode 100644
index 000000000..8394aaad4
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/HolderReaderImpl.java
@@ -0,0 +1,173 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+<@pp.dropOutputFile />
+<#list vv.types as type>
+<#list type.minor as minor>
+<#list ["", "Nullable"] as holderMode>
+<#assign nullMode = holderMode />
+
+<#assign lowerName = minor.class?uncap_first />
+<#if lowerName == "int" ><#assign lowerName = "integer" /></#if>
+<#assign name = minor.class?cap_first />
+<#assign javaType = (minor.javaType!type.javaType) />
+<#assign friendlyType = (minor.friendlyType!minor.boxedType!type.boxedType) />
+<#assign safeType=friendlyType />
+<#if safeType=="byte[]"><#assign safeType="ByteArray" /></#if>
+<#assign fields = (minor.fields!type.fields) + minor.typeParams![]/>
+
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/${holderMode}${name}HolderReaderImpl.java" />
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.complex.impl;
+
+<#include "/@includes/vv_imports.ftl" />
+
+// Source code generated using FreeMarker template ${.template_name}
+
+@SuppressWarnings("unused")
+public class ${holderMode}${name}HolderReaderImpl extends AbstractFieldReader {
+
+ private ${nullMode}${name}Holder holder;
+ public ${holderMode}${name}HolderReaderImpl(${holderMode}${name}Holder holder) {
+ this.holder = holder;
+ }
+
+ @Override
+ public int size() {
+ throw new UnsupportedOperationException("You can't call size on a Holder value reader.");
+ }
+
+ @Override
+ public boolean next() {
+ throw new UnsupportedOperationException("You can't call next on a single value reader.");
+
+ }
+
+ @Override
+ public void setPosition(int index) {
+ throw new UnsupportedOperationException("You can't call next on a single value reader.");
+ }
+
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.${name?upper_case};
+ }
+
+ @Override
+ public boolean isSet() {
+ <#if holderMode == "Nullable">
+ return this.holder.isSet == 1;
+ <#else>
+ return true;
+ </#if>
+ }
+
+ @Override
+ public void read(${name}Holder h) {
+ <#list fields as field>
+ h.${field.name} = holder.${field.name};
+ </#list>
+ }
+
+ @Override
+ public void read(Nullable${name}Holder h) {
+ <#list fields as field>
+ h.${field.name} = holder.${field.name};
+ </#list>
+ h.isSet = isSet() ? 1 : 0;
+ }
+
+ // read friendly type
+ @Override
+ public ${friendlyType} read${safeType}() {
+ <#if nullMode == "Nullable">
+ if (!isSet()) {
+ return null;
+ }
+ </#if>
+
+ <#if type.major == "VarLen">
+ <#if type.width == 4>
+ int length = holder.end - holder.start;
+ <#elseif type.width == 8>
+ int length = (int) (holder.end - holder.start);
+ </#if>
+ byte[] value = new byte [length];
+ holder.buffer.getBytes(holder.start, value, 0, length);
+ <#if minor.class == "VarBinary" || minor.class == "LargeVarBinary">
+ return value;
+ <#elseif minor.class == "VarChar" || minor.class == "LargeVarChar">
+ Text text = new Text();
+ text.set(value);
+ return text;
+ </#if>
+ <#elseif minor.class == "IntervalDay">
+ return Duration.ofDays(holder.days).plusMillis(holder.milliseconds);
+ <#elseif minor.class == "IntervalYear">
+ return Period.ofMonths(holder.value);
+ <#elseif minor.class == "IntervalMonthDayNano">
+ return new PeriodDuration(Period.ofMonths(holder.months).plusDays(holder.days),
+ Duration.ofNanos(holder.nanoseconds));
+ <#elseif minor.class == "Duration">
+ return DurationVector.toDuration(holder.value, holder.unit);
+ <#elseif minor.class == "Bit" >
+ return new Boolean(holder.value != 0);
+ <#elseif minor.class == "Decimal">
+ byte[] bytes = new byte[${type.width}];
+ holder.buffer.getBytes(holder.start, bytes, 0, ${type.width});
+ ${friendlyType} value = new BigDecimal(new BigInteger(bytes), holder.scale);
+ return value;
+ <#elseif minor.class == "Decimal256">
+ byte[] bytes = new byte[${type.width}];
+ holder.buffer.getBytes(holder.start, bytes, 0, ${type.width});
+ ${friendlyType} value = new BigDecimal(new BigInteger(bytes), holder.scale);
+ return value;
+ <#elseif minor.class == "FixedSizeBinary">
+ byte[] value = new byte [holder.byteWidth];
+ holder.buffer.getBytes(0, value, 0, holder.byteWidth);
+ return value;
+ <#elseif minor.class == "TimeStampSec">
+ final long millis = java.util.concurrent.TimeUnit.SECONDS.toMillis(holder.value);
+ return DateUtility.getLocalDateTimeFromEpochMilli(millis);
+ <#elseif minor.class == "TimeStampMilli" || minor.class == "DateMilli" || minor.class == "TimeMilli">
+ return DateUtility.getLocalDateTimeFromEpochMilli(holder.value);
+ <#elseif minor.class == "TimeStampMicro">
+ return DateUtility.getLocalDateTimeFromEpochMicro(holder.value);
+ <#elseif minor.class == "TimeStampNano">
+ return DateUtility.getLocalDateTimeFromEpochNano(holder.value);
+ <#else>
+ ${friendlyType} value = new ${friendlyType}(this.holder.value);
+ return value;
+ </#if>
+ }
+
+ @Override
+ public Object readObject() {
+ return read${safeType}();
+ }
+
+ <#if nullMode != "Nullable">
+ public void copyAsValue(${minor.class?cap_first}Writer writer){
+ writer.write(holder);
+ }
+ </#if>
+}
+
+</#list>
+</#list>
+</#list>
diff --git a/src/arrow/java/vector/src/main/codegen/templates/NullReader.java b/src/arrow/java/vector/src/main/codegen/templates/NullReader.java
new file mode 100644
index 000000000..0c65f9a56
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/NullReader.java
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.arrow.vector.types.pojo.ArrowType.Null;
+import org.apache.arrow.vector.types.pojo.Field;
+
+<@pp.dropOutputFile />
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/NullReader.java" />
+
+
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.complex.impl;
+
+<#include "/@includes/vv_imports.ftl" />
+
+/**
+ * Source code generated using FreeMarker template ${.template_name}
+ */
+@SuppressWarnings("unused")
+public class NullReader extends AbstractBaseReader implements FieldReader{
+
+ public static final NullReader INSTANCE = new NullReader();
+ public static final NullReader EMPTY_LIST_INSTANCE = new NullReader(MinorType.NULL);
+ public static final NullReader EMPTY_STRUCT_INSTANCE = new NullReader(MinorType.STRUCT);
+ private MinorType type;
+
+ private NullReader(){
+ super();
+ type = MinorType.NULL;
+ }
+
+ private NullReader(MinorType type){
+ super();
+ this.type = type;
+ }
+
+ @Override
+ public MinorType getMinorType() {
+ return type;
+ }
+
+ @Override
+ public Field getField() {
+ return new Field("", FieldType.nullable(new Null()), null);
+ }
+
+ public void copyAsValue(StructWriter writer) {}
+
+ public void copyAsValue(ListWriter writer) {}
+
+ public void copyAsValue(UnionWriter writer) {}
+
+ <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />
+ public void read(${name}Holder holder){
+ throw new UnsupportedOperationException("NullReader cannot write into non-nullable holder");
+ }
+
+ public void read(Nullable${name}Holder holder){
+ holder.isSet = 0;
+ }
+
+ public void read(int arrayIndex, ${name}Holder holder){
+ throw new ArrayIndexOutOfBoundsException();
+ }
+
+ public void copyAsValue(${minor.class}Writer writer){}
+ public void copyAsField(String name, ${minor.class}Writer writer){}
+
+ public void read(int arrayIndex, Nullable${name}Holder holder){
+ throw new ArrayIndexOutOfBoundsException();
+ }
+ </#list></#list>
+
+ public int size(){
+ return 0;
+ }
+
+ public boolean isSet(){
+ return false;
+ }
+
+ public boolean next(){
+ return false;
+ }
+
+ public RepeatedStructReader struct(){
+ return this;
+ }
+
+ public RepeatedListReader list(){
+ return this;
+ }
+
+ public StructReader struct(String name){
+ return this;
+ }
+
+ public ListReader list(String name){
+ return this;
+ }
+
+ public FieldReader reader(String name){
+ return this;
+ }
+
+ public FieldReader reader(){
+ return this;
+ }
+
+ private void fail(String name){
+ throw new IllegalArgumentException(String.format("You tried to read a %s type when you are using a ValueReader of type %s.", name, this.getClass().getSimpleName()));
+ }
+
+ <#list ["Object", "BigDecimal", "Short", "Integer", "Long", "Boolean",
+ "LocalDateTime", "Duration", "Period", "Double", "Float",
+ "Character", "Text", "String", "Byte", "byte[]", "PeriodDuration"] as friendlyType>
+ <#assign safeType=friendlyType />
+ <#if safeType=="byte[]"><#assign safeType="ByteArray" /></#if>
+
+ public ${friendlyType} read${safeType}(int arrayIndex){
+ return null;
+ }
+
+ public ${friendlyType} read${safeType}(){
+ return null;
+ }
+ </#list>
+
+}
+
+
+
diff --git a/src/arrow/java/vector/src/main/codegen/templates/StructWriters.java b/src/arrow/java/vector/src/main/codegen/templates/StructWriters.java
new file mode 100644
index 000000000..69693c630
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/StructWriters.java
@@ -0,0 +1,326 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+<@pp.dropOutputFile />
+<#list ["Nullable", "Single"] as mode>
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/${mode}StructWriter.java" />
+<#assign index = "idx()">
+<#if mode == "Single">
+<#assign containerClass = "NonNullableStructVector" />
+<#else>
+<#assign containerClass = "StructVector" />
+</#if>
+
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.complex.impl;
+
+<#include "/@includes/vv_imports.ftl" />
+import java.util.Map;
+import java.util.HashMap;
+
+import org.apache.arrow.vector.holders.RepeatedStructHolder;
+import org.apache.arrow.vector.AllocationHelper;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.complex.writer.FieldWriter;
+
+/*
+ * This class is generated using FreeMarker and the ${.template_name} template.
+ */
+@SuppressWarnings("unused")
+public class ${mode}StructWriter extends AbstractFieldWriter {
+
+ protected final ${containerClass} container;
+ private int initialCapacity;
+ private final Map<String, FieldWriter> fields = new HashMap<>();
+ public ${mode}StructWriter(${containerClass} container) {
+ <#if mode == "Single">
+ if (container instanceof StructVector) {
+ throw new IllegalArgumentException("Invalid container: " + container);
+ }
+ </#if>
+ this.container = container;
+ this.initialCapacity = 0;
+ for (Field child : container.getField().getChildren()) {
+ MinorType minorType = Types.getMinorTypeForArrowType(child.getType());
+ switch (minorType) {
+ case STRUCT:
+ struct(child.getName());
+ break;
+ case LIST:
+ list(child.getName());
+ break;
+ case MAP: {
+ ArrowType.Map arrowType = (ArrowType.Map) child.getType();
+ map(child.getName(), arrowType.getKeysSorted());
+ break;
+ }
+ case UNION:
+ FieldType fieldType = new FieldType(addVectorAsNullable, MinorType.UNION.getType(), null, null);
+ UnionWriter writer = new UnionWriter(container.addOrGet(child.getName(), fieldType, UnionVector.class), getNullableStructWriterFactory());
+ fields.put(handleCase(child.getName()), writer);
+ break;
+<#list vv.types as type><#list type.minor as minor>
+<#assign lowerName = minor.class?uncap_first />
+<#if lowerName == "int" ><#assign lowerName = "integer" /></#if>
+<#assign upperName = minor.class?upper_case />
+ case ${upperName}: {
+ <#if minor.typeParams?? >
+ ${minor.arrowType} arrowType = (${minor.arrowType})child.getType();
+ ${lowerName}(child.getName()<#list minor.typeParams as typeParam>, arrowType.get${typeParam.name?cap_first}()</#list>);
+ <#else>
+ ${lowerName}(child.getName());
+ </#if>
+ break;
+ }
+</#list></#list>
+ default:
+ throw new UnsupportedOperationException("Unknown type: " + minorType);
+ }
+ }
+ }
+
+ protected String handleCase(final String input) {
+ return input.toLowerCase();
+ }
+
+ protected NullableStructWriterFactory getNullableStructWriterFactory() {
+ return NullableStructWriterFactory.getNullableStructWriterFactoryInstance();
+ }
+
+ @Override
+ public int getValueCapacity() {
+ return container.getValueCapacity();
+ }
+
+ public void setInitialCapacity(int initialCapacity) {
+ this.initialCapacity = initialCapacity;
+ container.setInitialCapacity(initialCapacity);
+ }
+
+ @Override
+ public boolean isEmptyStruct() {
+ return 0 == container.size();
+ }
+
+ @Override
+ public Field getField() {
+ return container.getField();
+ }
+
+ @Override
+ public StructWriter struct(String name) {
+ String finalName = handleCase(name);
+ FieldWriter writer = fields.get(finalName);
+ if(writer == null){
+ int vectorCount=container.size();
+ FieldType fieldType = new FieldType(addVectorAsNullable, MinorType.STRUCT.getType(), null, null);
+ StructVector vector = container.addOrGet(name, fieldType, StructVector.class);
+ writer = new PromotableWriter(vector, container, getNullableStructWriterFactory());
+ if(vectorCount != container.size()) {
+ writer.allocate();
+ }
+ writer.setPosition(idx());
+ fields.put(finalName, writer);
+ } else {
+ if (writer instanceof PromotableWriter) {
+ // ensure writers are initialized
+ ((PromotableWriter)writer).getWriter(MinorType.STRUCT);
+ }
+ }
+ return writer;
+ }
+
+ @Override
+ public void close() throws Exception {
+ clear();
+ container.close();
+ }
+
+ @Override
+ public void allocate() {
+ container.allocateNew();
+ for(final FieldWriter w : fields.values()) {
+ w.allocate();
+ }
+ }
+
+ @Override
+ public void clear() {
+ container.clear();
+ for(final FieldWriter w : fields.values()) {
+ w.clear();
+ }
+ }
+
+ @Override
+ public ListWriter list(String name) {
+ String finalName = handleCase(name);
+ FieldWriter writer = fields.get(finalName);
+ int vectorCount = container.size();
+ if(writer == null) {
+ FieldType fieldType = new FieldType(addVectorAsNullable, MinorType.LIST.getType(), null, null);
+ writer = new PromotableWriter(container.addOrGet(name, fieldType, ListVector.class), container, getNullableStructWriterFactory());
+ if (container.size() > vectorCount) {
+ writer.allocate();
+ }
+ writer.setPosition(idx());
+ fields.put(finalName, writer);
+ } else {
+ if (writer instanceof PromotableWriter) {
+ // ensure writers are initialized
+ ((PromotableWriter)writer).getWriter(MinorType.LIST);
+ }
+ }
+ return writer;
+ }
+
+ @Override
+ public MapWriter map(String name) {
+ return map(name, false);
+ }
+
+ @Override
+ public MapWriter map(String name, boolean keysSorted) {
+ FieldWriter writer = fields.get(handleCase(name));
+ if(writer == null) {
+ ValueVector vector;
+ ValueVector currentVector = container.getChild(name);
+ MapVector v = container.addOrGet(name,
+ new FieldType(addVectorAsNullable,
+ new ArrowType.Map(keysSorted)
+ ,null, null),
+ MapVector.class);
+ writer = new PromotableWriter(v, container, getNullableStructWriterFactory());
+ vector = v;
+ if (currentVector == null || currentVector != vector) {
+ if(this.initialCapacity > 0) {
+ vector.setInitialCapacity(this.initialCapacity);
+ }
+ vector.allocateNewSafe();
+ }
+ writer.setPosition(idx());
+ fields.put(handleCase(name), writer);
+ } else {
+ if (writer instanceof PromotableWriter) {
+ // ensure writers are initialized
+ ((PromotableWriter)writer).getWriter(MinorType.MAP, new ArrowType.Map(keysSorted));
+ }
+ }
+ return writer;
+ }
+
+ public void setValueCount(int count) {
+ container.setValueCount(count);
+ }
+
+ @Override
+ public void setPosition(int index) {
+ super.setPosition(index);
+ for(final FieldWriter w: fields.values()) {
+ w.setPosition(index);
+ }
+ }
+
+ <#if mode="Nullable">
+ @Override
+ public void writeNull() {
+ container.setNull(idx());
+ setValueCount(idx()+1);
+ super.setPosition(idx()+1);
+ }
+ </#if>
+
+ @Override
+ public void start() {
+ <#if mode == "Single">
+ <#else>
+ container.setIndexDefined(idx());
+ </#if>
+ }
+
+ @Override
+ public void end() {
+ setPosition(idx()+1);
+ }
+
+ <#list vv.types as type><#list type.minor as minor>
+ <#assign lowerName = minor.class?uncap_first />
+ <#if lowerName == "int" ><#assign lowerName = "integer" /></#if>
+ <#assign upperName = minor.class?upper_case />
+ <#assign capName = minor.class?cap_first />
+ <#assign vectName = capName />
+
+ <#if minor.typeParams?? >
+ @Override
+ public ${minor.class}Writer ${lowerName}(String name) {
+ // returns existing writer
+ final FieldWriter writer = fields.get(handleCase(name));
+ Preconditions.checkNotNull(writer);
+ return writer;
+ }
+
+ @Override
+ public ${minor.class}Writer ${lowerName}(String name<#list minor.typeParams as typeParam>, ${typeParam.type} ${typeParam.name}</#list>) {
+ <#else>
+ @Override
+ public ${minor.class}Writer ${lowerName}(String name) {
+ </#if>
+ FieldWriter writer = fields.get(handleCase(name));
+ if(writer == null) {
+ ValueVector vector;
+ ValueVector currentVector = container.getChild(name);
+ ${vectName}Vector v = container.addOrGet(name,
+ new FieldType(addVectorAsNullable,
+ <#if minor.typeParams??>
+ <#if minor.arrowTypeConstructorParams??>
+ <#assign constructorParams = minor.arrowTypeConstructorParams />
+ <#else>
+ <#assign constructorParams = [] />
+ <#list minor.typeParams?reverse as typeParam>
+ <#assign constructorParams = constructorParams + [ typeParam.name ] />
+ </#list>
+ </#if>
+ new ${minor.arrowType}(${constructorParams?join(", ")}<#if minor.class?starts_with("Decimal")>, ${vectName}Vector.TYPE_WIDTH * 8</#if>)
+ <#else>
+ MinorType.${upperName}.getType()
+ </#if>
+ ,null, null),
+ ${vectName}Vector.class);
+ writer = new PromotableWriter(v, container, getNullableStructWriterFactory());
+ vector = v;
+ if (currentVector == null || currentVector != vector) {
+ if(this.initialCapacity > 0) {
+ vector.setInitialCapacity(this.initialCapacity);
+ }
+ vector.allocateNewSafe();
+ }
+ writer.setPosition(idx());
+ fields.put(handleCase(name), writer);
+ } else {
+ if (writer instanceof PromotableWriter) {
+ // ensure writers are initialized
+ ((PromotableWriter)writer).getWriter(MinorType.${upperName}<#if minor.class?starts_with("Decimal")>, new ${minor.arrowType}(precision, scale, ${vectName}Vector.TYPE_WIDTH * 8)</#if>);
+ }
+ }
+ return writer;
+ }
+
+ </#list></#list>
+
+}
+</#list>
diff --git a/src/arrow/java/vector/src/main/codegen/templates/UnionFixedSizeListWriter.java b/src/arrow/java/vector/src/main/codegen/templates/UnionFixedSizeListWriter.java
new file mode 100644
index 000000000..55c661bfc
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/UnionFixedSizeListWriter.java
@@ -0,0 +1,319 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.vector.complex.writer.Decimal256Writer;
+import org.apache.arrow.vector.complex.writer.DecimalWriter;
+import org.apache.arrow.vector.holders.Decimal256Holder;
+import org.apache.arrow.vector.holders.DecimalHolder;
+
+
+import java.lang.UnsupportedOperationException;
+import java.math.BigDecimal;
+
+<@pp.dropOutputFile />
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/UnionFixedSizeListWriter.java" />
+
+
+<#include "/@includes/license.ftl" />
+
+ package org.apache.arrow.vector.complex.impl;
+
+<#include "/@includes/vv_imports.ftl" />
+
+/*
+ * This class is generated using freemarker and the ${.template_name} template.
+ */
+
+@SuppressWarnings("unused")
+public class UnionFixedSizeListWriter extends AbstractFieldWriter {
+
+ protected FixedSizeListVector vector;
+ protected PromotableWriter writer;
+ private boolean inStruct = false;
+ private String structName;
+ private final int listSize;
+
+ public UnionFixedSizeListWriter(FixedSizeListVector vector) {
+ this(vector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance());
+ }
+
+ public UnionFixedSizeListWriter(FixedSizeListVector vector, NullableStructWriterFactory nullableStructWriterFactory) {
+ this.vector = vector;
+ this.writer = new PromotableWriter(vector.getDataVector(), vector, nullableStructWriterFactory);
+ this.listSize = vector.getListSize();
+ }
+
+ public UnionFixedSizeListWriter(FixedSizeListVector vector, AbstractFieldWriter parent) {
+ this(vector);
+ }
+
+ @Override
+ public void allocate() {
+ vector.allocateNew();
+ }
+
+ @Override
+ public void clear() {
+ vector.clear();
+ }
+
+ @Override
+ public Field getField() {
+ return vector.getField();
+ }
+
+ public void setValueCount(int count) {
+ vector.setValueCount(count);
+ }
+
+ @Override
+ public int getValueCapacity() {
+ return vector.getValueCapacity();
+ }
+
+ @Override
+ public void close() throws Exception {
+ vector.close();
+ writer.close();
+ }
+
+ @Override
+ public void setPosition(int index) {
+ super.setPosition(index);
+ }
+ <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />
+ <#assign fields = minor.fields!type.fields />
+ <#assign uncappedName = name?uncap_first/>
+ <#if uncappedName == "int" ><#assign uncappedName = "integer" /></#if>
+ <#if !minor.typeParams?? >
+
+ @Override
+ public ${name}Writer ${uncappedName}() {
+ return this;
+ }
+
+ @Override
+ public ${name}Writer ${uncappedName}(String name) {
+ structName = name;
+ return writer.${uncappedName}(name);
+ }
+ </#if>
+ </#list></#list>
+
+ @Override
+ public DecimalWriter decimal() {
+ return this;
+ }
+
+ @Override
+ public DecimalWriter decimal(String name, int scale, int precision) {
+ return writer.decimal(name, scale, precision);
+ }
+
+ @Override
+ public DecimalWriter decimal(String name) {
+ return writer.decimal(name);
+ }
+
+
+ @Override
+ public Decimal256Writer decimal256() {
+ return this;
+ }
+
+ @Override
+ public Decimal256Writer decimal256(String name, int scale, int precision) {
+ return writer.decimal256(name, scale, precision);
+ }
+
+ @Override
+ public Decimal256Writer decimal256(String name) {
+ return writer.decimal256(name);
+ }
+
+ @Override
+ public StructWriter struct() {
+ inStruct = true;
+ return this;
+ }
+
+ @Override
+ public ListWriter list() {
+ return writer;
+ }
+
+ @Override
+ public ListWriter list(String name) {
+ ListWriter listWriter = writer.list(name);
+ return listWriter;
+ }
+
+ @Override
+ public StructWriter struct(String name) {
+ StructWriter structWriter = writer.struct(name);
+ return structWriter;
+ }
+
+ @Override
+ public MapWriter map() {
+ return writer;
+ }
+
+ @Override
+ public MapWriter map(String name) {
+ MapWriter mapWriter = writer.map(name);
+ return mapWriter;
+ }
+
+ @Override
+ public MapWriter map(boolean keysSorted) {
+ writer.map(keysSorted);
+ return writer;
+ }
+
+ @Override
+ public MapWriter map(String name, boolean keysSorted) {
+ MapWriter mapWriter = writer.map(name, keysSorted);
+ return mapWriter;
+ }
+
+ @Override
+ public void startList() {
+ int start = vector.startNewValue(idx());
+ writer.setPosition(start);
+ }
+
+ @Override
+ public void endList() {
+ setPosition(idx() + 1);
+ }
+
+ @Override
+ public void start() {
+ writer.start();
+ }
+
+ @Override
+ public void end() {
+ writer.end();
+ inStruct = false;
+ }
+
+ @Override
+ public void write(DecimalHolder holder) {
+ if (writer.idx() >= (idx() + 1) * listSize) {
+ throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize));
+ }
+ writer.write(holder);
+ writer.setPosition(writer.idx() + 1);
+ }
+
+ @Override
+ public void write(Decimal256Holder holder) {
+ if (writer.idx() >= (idx() + 1) * listSize) {
+ throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize));
+ }
+ writer.write(holder);
+ writer.setPosition(writer.idx() + 1);
+ }
+
+
+ @Override
+ public void writeNull() {
+ if (writer.idx() >= (idx() + 1) * listSize) {
+ throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize));
+ }
+ writer.writeNull();
+ }
+
+ public void writeDecimal(long start, ArrowBuf buffer, ArrowType arrowType) {
+ if (writer.idx() >= (idx() + 1) * listSize) {
+ throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize));
+ }
+ writer.writeDecimal(start, buffer, arrowType);
+ writer.setPosition(writer.idx() + 1);
+ }
+
+ public void writeDecimal(BigDecimal value) {
+ if (writer.idx() >= (idx() + 1) * listSize) {
+ throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize));
+ }
+ writer.writeDecimal(value);
+ writer.setPosition(writer.idx() + 1);
+ }
+
+ public void writeBigEndianBytesToDecimal(byte[] value, ArrowType arrowType) {
+ if (writer.idx() >= (idx() + 1) * listSize) {
+ throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize));
+ }
+ writer.writeBigEndianBytesToDecimal(value, arrowType);
+ writer.setPosition(writer.idx() + 1);
+ }
+
+ public void writeDecimal256(long start, ArrowBuf buffer, ArrowType arrowType) {
+ if (writer.idx() >= (idx() + 1) * listSize) {
+ throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize));
+ }
+ writer.writeDecimal256(start, buffer, arrowType);
+ writer.setPosition(writer.idx() + 1);
+ }
+
+ public void writeDecimal256(BigDecimal value) {
+ if (writer.idx() >= (idx() + 1) * listSize) {
+ throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize));
+ }
+ writer.writeDecimal256(value);
+ writer.setPosition(writer.idx() + 1);
+ }
+
+ public void writeBigEndianBytesToDecimal256(byte[] value, ArrowType arrowType) {
+ if (writer.idx() >= (idx() + 1) * listSize) {
+ throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize));
+ }
+ writer.writeBigEndianBytesToDecimal256(value, arrowType);
+ writer.setPosition(writer.idx() + 1);
+ }
+
+
+ <#list vv.types as type>
+ <#list type.minor as minor>
+ <#assign name = minor.class?cap_first />
+ <#assign fields = minor.fields!type.fields />
+ <#assign uncappedName = name?uncap_first/>
+ <#if !minor.typeParams?? >
+ @Override
+ public void write${name}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list>) {
+ if (writer.idx() >= (idx() + 1) * listSize) {
+ throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize));
+ }
+ writer.write${name}(<#list fields as field>${field.name}<#if field_has_next>, </#if></#list>);
+ writer.setPosition(writer.idx() + 1);
+ }
+
+ public void write(${name}Holder holder) {
+ if (writer.idx() >= (idx() + 1) * listSize) {
+ throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize));
+ }
+ writer.write${name}(<#list fields as field>holder.${field.name}<#if field_has_next>, </#if></#list>);
+ writer.setPosition(writer.idx() + 1);
+ }
+
+ </#if>
+ </#list>
+ </#list>
+}
diff --git a/src/arrow/java/vector/src/main/codegen/templates/UnionListWriter.java b/src/arrow/java/vector/src/main/codegen/templates/UnionListWriter.java
new file mode 100644
index 000000000..926276b5e
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/UnionListWriter.java
@@ -0,0 +1,326 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.vector.complex.writer.Decimal256Writer;
+import org.apache.arrow.vector.complex.writer.DecimalWriter;
+import org.apache.arrow.vector.holders.Decimal256Holder;
+import org.apache.arrow.vector.holders.DecimalHolder;
+
+
+import java.lang.UnsupportedOperationException;
+import java.math.BigDecimal;
+
+<@pp.dropOutputFile />
+<#list ["List", "LargeList"] as listName>
+
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/Union${listName}Writer.java" />
+
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.complex.impl;
+
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+<#include "/@includes/vv_imports.ftl" />
+
+/*
+ * This class is generated using freemarker and the ${.template_name} template.
+ */
+
+@SuppressWarnings("unused")
+public class Union${listName}Writer extends AbstractFieldWriter {
+
+ protected ${listName}Vector vector;
+ protected PromotableWriter writer;
+ private boolean inStruct = false;
+ private boolean listStarted = false;
+ private String structName;
+ <#if listName == "LargeList">
+ private static final long OFFSET_WIDTH = 8;
+ <#else>
+ private static final int OFFSET_WIDTH = 4;
+ </#if>
+
+ public Union${listName}Writer(${listName}Vector vector) {
+ this(vector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance());
+ }
+
+ public Union${listName}Writer(${listName}Vector vector, NullableStructWriterFactory nullableStructWriterFactory) {
+ this.vector = vector;
+ this.writer = new PromotableWriter(vector.getDataVector(), vector, nullableStructWriterFactory);
+ }
+
+ public Union${listName}Writer(${listName}Vector vector, AbstractFieldWriter parent) {
+ this(vector);
+ }
+
+ @Override
+ public void allocate() {
+ vector.allocateNew();
+ }
+
+ @Override
+ public void clear() {
+ vector.clear();
+ }
+
+ @Override
+ public Field getField() {
+ return vector.getField();
+ }
+
+ public void setValueCount(int count) {
+ vector.setValueCount(count);
+ }
+
+ @Override
+ public int getValueCapacity() {
+ return vector.getValueCapacity();
+ }
+
+ @Override
+ public void close() throws Exception {
+ vector.close();
+ writer.close();
+ }
+
+ @Override
+ public void setPosition(int index) {
+ super.setPosition(index);
+ }
+
+ <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />
+ <#assign fields = minor.fields!type.fields />
+ <#assign uncappedName = name?uncap_first/>
+ <#if uncappedName == "int" ><#assign uncappedName = "integer" /></#if>
+ <#if !minor.typeParams?? >
+
+ @Override
+ public ${name}Writer ${uncappedName}() {
+ return this;
+ }
+
+ @Override
+ public ${name}Writer ${uncappedName}(String name) {
+ structName = name;
+ return writer.${uncappedName}(name);
+ }
+ </#if>
+ </#list></#list>
+
+ @Override
+ public DecimalWriter decimal() {
+ return this;
+ }
+
+ @Override
+ public DecimalWriter decimal(String name, int scale, int precision) {
+ return writer.decimal(name, scale, precision);
+ }
+
+ @Override
+ public DecimalWriter decimal(String name) {
+ return writer.decimal(name);
+ }
+
+ @Override
+ public Decimal256Writer decimal256() {
+ return this;
+ }
+
+ @Override
+ public Decimal256Writer decimal256(String name, int scale, int precision) {
+ return writer.decimal256(name, scale, precision);
+ }
+
+ @Override
+ public Decimal256Writer decimal256(String name) {
+ return writer.decimal256(name);
+ }
+
+
+ @Override
+ public StructWriter struct() {
+ inStruct = true;
+ return this;
+ }
+
+ @Override
+ public ListWriter list() {
+ return writer;
+ }
+
+ @Override
+ public ListWriter list(String name) {
+ ListWriter listWriter = writer.list(name);
+ return listWriter;
+ }
+
+ @Override
+ public StructWriter struct(String name) {
+ StructWriter structWriter = writer.struct(name);
+ return structWriter;
+ }
+
+ @Override
+ public MapWriter map() {
+ return writer;
+ }
+
+ @Override
+ public MapWriter map(String name) {
+ MapWriter mapWriter = writer.map(name);
+ return mapWriter;
+ }
+
+ @Override
+ public MapWriter map(boolean keysSorted) {
+ writer.map(keysSorted);
+ return writer;
+ }
+
+ @Override
+ public MapWriter map(String name, boolean keysSorted) {
+ MapWriter mapWriter = writer.map(name, keysSorted);
+ return mapWriter;
+ }
+
+ <#if listName == "LargeList">
+ @Override
+ public void startList() {
+ vector.startNewValue(idx());
+ writer.setPosition(checkedCastToInt(vector.getOffsetBuffer().getLong((idx() + 1L) * OFFSET_WIDTH)));
+ listStarted = true;
+ }
+
+ @Override
+ public void endList() {
+ vector.getOffsetBuffer().setLong((idx() + 1L) * OFFSET_WIDTH, writer.idx());
+ setPosition(idx() + 1);
+ listStarted = false;
+ }
+ <#else>
+ @Override
+ public void startList() {
+ vector.startNewValue(idx());
+ writer.setPosition(vector.getOffsetBuffer().getInt((idx() + 1L) * OFFSET_WIDTH));
+ listStarted = true;
+ }
+
+ @Override
+ public void endList() {
+ vector.getOffsetBuffer().setInt((idx() + 1L) * OFFSET_WIDTH, writer.idx());
+ setPosition(idx() + 1);
+ listStarted = false;
+ }
+ </#if>
+
+ @Override
+ public void start() {
+ writer.start();
+ }
+
+ @Override
+ public void end() {
+ writer.end();
+ inStruct = false;
+ }
+
+ @Override
+ public void write(DecimalHolder holder) {
+ writer.write(holder);
+ writer.setPosition(writer.idx()+1);
+ }
+
+ @Override
+ public void write(Decimal256Holder holder) {
+ writer.write(holder);
+ writer.setPosition(writer.idx()+1);
+ }
+
+ @Override
+ public void writeNull() {
+ if (!listStarted){
+ vector.setNull(idx());
+ } else {
+ writer.writeNull();
+ }
+ }
+
+ public void writeDecimal(long start, ArrowBuf buffer, ArrowType arrowType) {
+ writer.writeDecimal(start, buffer, arrowType);
+ writer.setPosition(writer.idx()+1);
+ }
+
+ public void writeDecimal(long start, ArrowBuf buffer) {
+ writer.writeDecimal(start, buffer);
+ writer.setPosition(writer.idx()+1);
+ }
+
+ public void writeDecimal(BigDecimal value) {
+ writer.writeDecimal(value);
+ writer.setPosition(writer.idx()+1);
+ }
+
+ public void writeBigEndianBytesToDecimal(byte[] value, ArrowType arrowType){
+ writer.writeBigEndianBytesToDecimal(value, arrowType);
+ writer.setPosition(writer.idx() + 1);
+ }
+
+ public void writeDecimal256(long start, ArrowBuf buffer, ArrowType arrowType) {
+ writer.writeDecimal256(start, buffer, arrowType);
+ writer.setPosition(writer.idx()+1);
+ }
+
+ public void writeDecimal256(long start, ArrowBuf buffer) {
+ writer.writeDecimal256(start, buffer);
+ writer.setPosition(writer.idx()+1);
+ }
+
+ public void writeDecimal256(BigDecimal value) {
+ writer.writeDecimal256(value);
+ writer.setPosition(writer.idx()+1);
+ }
+
+ public void writeBigEndianBytesToDecimal256(byte[] value, ArrowType arrowType){
+ writer.writeBigEndianBytesToDecimal256(value, arrowType);
+ writer.setPosition(writer.idx() + 1);
+ }
+
+
+ <#list vv.types as type>
+ <#list type.minor as minor>
+ <#assign name = minor.class?cap_first />
+ <#assign fields = minor.fields!type.fields />
+ <#assign uncappedName = name?uncap_first/>
+ <#if !minor.typeParams?? >
+ @Override
+ public void write${name}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list>) {
+ writer.write${name}(<#list fields as field>${field.name}<#if field_has_next>, </#if></#list>);
+ writer.setPosition(writer.idx()+1);
+ }
+
+ public void write(${name}Holder holder) {
+ writer.write${name}(<#list fields as field>holder.${field.name}<#if field_has_next>, </#if></#list>);
+ writer.setPosition(writer.idx()+1);
+ }
+
+ </#if>
+ </#list>
+ </#list>
+}
+</#list>
diff --git a/src/arrow/java/vector/src/main/codegen/templates/UnionMapWriter.java b/src/arrow/java/vector/src/main/codegen/templates/UnionMapWriter.java
new file mode 100644
index 000000000..606f88037
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/UnionMapWriter.java
@@ -0,0 +1,222 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.vector.complex.writer.Decimal256Writer;
+import org.apache.arrow.vector.complex.writer.DecimalWriter;
+import org.apache.arrow.vector.holders.Decimal256Holder;
+import org.apache.arrow.vector.holders.DecimalHolder;
+
+import java.lang.UnsupportedOperationException;
+import java.math.BigDecimal;
+
+<@pp.dropOutputFile />
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/UnionMapWriter.java" />
+
+
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.complex.impl;
+
+<#include "/@includes/vv_imports.ftl" />
+
+/*
+ * This class is generated using freemarker and the ${.template_name} template.
+ */
+
+/**
+ * <p>Writer for MapVectors. This extends UnionListWriter to simplify writing map entries to a list
+ * of struct elements, with "key" and "value" fields. The procedure for writing a map begin with
+ * {@link #startMap()} followed by {@link #startEntry()}. An entry is written by using the
+ * {@link #key()} writer to write the key, then the {@link #value()} writer to write a value. After
+ * writing the value, call {@link #endEntry()} to complete the entry. Each map can have 1 or more
+ * entries. When done writing entries, call {@link #endMap()} to complete the map.
+ *
+ * <p>NOTE: the MapVector can have NULL values by not writing to position. If a map is started with
+ * {@link #startMap()}, then it must have a key written. The value of a map entry can be NULL by
+ * not using the {@link #value()} writer.
+ *
+ * <p>Example to write the following map to position 5 of a vector
+ * <pre>{@code
+ * // {
+ * // 1 -> 3,
+ * // 2 -> 4,
+ * // 3 -> NULL
+ * // }
+ *
+ * UnionMapWriter writer = ...
+ *
+ * writer.setPosition(5);
+ * writer.startMap();
+ * writer.startEntry();
+ * writer.key().integer().writeInt(1);
+ * writer.value().integer().writeInt(3);
+ * writer.endEntry();
+ * writer.startEntry();
+ * writer.key().integer().writeInt(2);
+ * writer.value().integer().writeInt(4);
+ * writer.endEntry();
+ * writer.startEntry();
+ * writer.key().integer().writeInt(3);
+ * writer.endEntry();
+ * writer.endMap();
+ * </pre>
+ * </p>
+ */
+@SuppressWarnings("unused")
+public class UnionMapWriter extends UnionListWriter {
+
+ /**
+ * Current mode for writing map entries, set by calling {@link #key()} or {@link #value()}
+ * and reset with a call to {@link #endEntry()}. With KEY mode, a struct writer with field
+ * named "key" is returned. With VALUE mode, a struct writer with field named "value" is
+ * returned. In OFF mode, the writer will behave like a standard UnionListWriter
+ */
+ private enum MapWriteMode {
+ OFF,
+ KEY,
+ VALUE,
+ }
+
+ private MapWriteMode mode = MapWriteMode.OFF;
+ private StructWriter entryWriter;
+
+ public UnionMapWriter(MapVector vector) {
+ super(vector);
+ entryWriter = struct();
+ }
+
+ /** Start writing a map that consists of 1 or more entries. */
+ public void startMap() {
+ startList();
+ }
+
+ /** Complete the map. */
+ public void endMap() {
+ endList();
+ }
+
+ /**
+ * Start a map entry that should be followed by calls to {@link #key()} and {@link #value()}
+ * writers. Call {@link #endEntry()} to complete the entry.
+ */
+ public void startEntry() {
+ writer.setAddVectorAsNullable(false);
+ entryWriter.start();
+ }
+
+ /** Complete the map entry. */
+ public void endEntry() {
+ entryWriter.end();
+ mode = MapWriteMode.OFF;
+ writer.setAddVectorAsNullable(true);
+ }
+
+ /** Return the key writer that is used to write to the "key" field. */
+ public UnionMapWriter key() {
+ writer.setAddVectorAsNullable(false);
+ mode = MapWriteMode.KEY;
+ return this;
+ }
+
+ /** Return the value writer that is used to write to the "value" field. */
+ public UnionMapWriter value() {
+ writer.setAddVectorAsNullable(true);
+ mode = MapWriteMode.VALUE;
+ return this;
+ }
+
+ <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />
+ <#assign fields = minor.fields!type.fields />
+ <#assign uncappedName = name?uncap_first/>
+ <#if uncappedName == "int" ><#assign uncappedName = "integer" /></#if>
+ <#if !minor.typeParams?? >
+ @Override
+ public ${name}Writer ${uncappedName}() {
+ switch (mode) {
+ case KEY:
+ return entryWriter.${uncappedName}(MapVector.KEY_NAME);
+ case VALUE:
+ return entryWriter.${uncappedName}(MapVector.VALUE_NAME);
+ default:
+ return this;
+ }
+ }
+
+ </#if>
+ </#list></#list>
+ @Override
+ public DecimalWriter decimal() {
+ switch (mode) {
+ case KEY:
+ return entryWriter.decimal(MapVector.KEY_NAME);
+ case VALUE:
+ return entryWriter.decimal(MapVector.VALUE_NAME);
+ default:
+ return this;
+ }
+ }
+
+ @Override
+ public Decimal256Writer decimal256() {
+ switch (mode) {
+ case KEY:
+ return entryWriter.decimal256(MapVector.KEY_NAME);
+ case VALUE:
+ return entryWriter.decimal256(MapVector.VALUE_NAME);
+ default:
+ return this;
+ }
+ }
+
+
+ @Override
+ public StructWriter struct() {
+ switch (mode) {
+ case KEY:
+ return entryWriter.struct(MapVector.KEY_NAME);
+ case VALUE:
+ return entryWriter.struct(MapVector.VALUE_NAME);
+ default:
+ return super.struct();
+ }
+ }
+
+ @Override
+ public ListWriter list() {
+ switch (mode) {
+ case KEY:
+ return entryWriter.list(MapVector.KEY_NAME);
+ case VALUE:
+ return entryWriter.list(MapVector.VALUE_NAME);
+ default:
+ return super.list();
+ }
+ }
+
+ @Override
+ public MapWriter map(boolean keysSorted) {
+ switch (mode) {
+ case KEY:
+ return entryWriter.map(MapVector.KEY_NAME, keysSorted);
+ case VALUE:
+ return entryWriter.map(MapVector.VALUE_NAME, keysSorted);
+ default:
+ return super.map();
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/codegen/templates/UnionReader.java b/src/arrow/java/vector/src/main/codegen/templates/UnionReader.java
new file mode 100644
index 000000000..444ca9ca7
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/UnionReader.java
@@ -0,0 +1,223 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+
+<@pp.dropOutputFile />
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/UnionReader.java" />
+
+
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.complex.impl;
+
+<#include "/@includes/vv_imports.ftl" />
+/**
+ * Source code generated using FreeMarker template ${.template_name}
+ */
+@SuppressWarnings("unused")
+public class UnionReader extends AbstractFieldReader {
+
+ private BaseReader[] readers = new BaseReader[45];
+ public UnionVector data;
+
+ public UnionReader(UnionVector data) {
+ this.data = data;
+ }
+
+ public MinorType getMinorType() {
+ return TYPES[data.getTypeValue(idx())];
+ }
+
+ private static MinorType[] TYPES = new MinorType[45];
+
+ static {
+ for (MinorType minorType : MinorType.values()) {
+ TYPES[minorType.ordinal()] = minorType;
+ }
+ }
+
+ @Override
+ public Field getField() {
+ return data.getField();
+ }
+
+ public boolean isSet(){
+ return !data.isNull(idx());
+ }
+
+ public void read(UnionHolder holder) {
+ holder.reader = this;
+ holder.isSet = this.isSet() ? 1 : 0;
+ }
+
+ public void read(int index, UnionHolder holder) {
+ getList().read(index, holder);
+ }
+
+ private FieldReader getReaderForIndex(int index) {
+ int typeValue = data.getTypeValue(index);
+ FieldReader reader = (FieldReader) readers[typeValue];
+ if (reader != null) {
+ return reader;
+ }
+ switch (MinorType.values()[typeValue]) {
+ case NULL:
+ return NullReader.INSTANCE;
+ case STRUCT:
+ return (FieldReader) getStruct();
+ case LIST:
+ return (FieldReader) getList();
+ case MAP:
+ return (FieldReader) getMap();
+ <#list vv.types as type>
+ <#list type.minor as minor>
+ <#assign name = minor.class?cap_first />
+ <#assign uncappedName = name?uncap_first/>
+ <#if !minor.typeParams?? || minor.class?starts_with("Decimal")>
+ case ${name?upper_case}:
+ return (FieldReader) get${name}();
+ </#if>
+ </#list>
+ </#list>
+ default:
+ throw new UnsupportedOperationException("Unsupported type: " + MinorType.values()[typeValue]);
+ }
+ }
+
+ private SingleStructReaderImpl structReader;
+
+ private StructReader getStruct() {
+ if (structReader == null) {
+ structReader = (SingleStructReaderImpl) data.getStruct().getReader();
+ structReader.setPosition(idx());
+ readers[MinorType.STRUCT.ordinal()] = structReader;
+ }
+ return structReader;
+ }
+
+ private UnionListReader listReader;
+
+ private FieldReader getList() {
+ if (listReader == null) {
+ listReader = new UnionListReader(data.getList());
+ listReader.setPosition(idx());
+ readers[MinorType.LIST.ordinal()] = listReader;
+ }
+ return listReader;
+ }
+
+ private UnionMapReader mapReader;
+
+ private FieldReader getMap() {
+ if (mapReader == null) {
+ mapReader = new UnionMapReader(data.getMap());
+ mapReader.setPosition(idx());
+ readers[MinorType.MAP.ordinal()] = mapReader;
+ }
+ return mapReader;
+ }
+
+ @Override
+ public java.util.Iterator<String> iterator() {
+ return getStruct().iterator();
+ }
+
+ @Override
+ public void copyAsValue(UnionWriter writer) {
+ writer.data.copyFrom(idx(), writer.idx(), data);
+ }
+
+ <#list ["Object", "BigDecimal", "Short", "Integer", "Long", "Boolean",
+ "LocalDateTime", "Duration", "Period", "Double", "Float",
+ "Character", "Text", "Byte", "byte[]", "PeriodDuration"] as friendlyType>
+ <#assign safeType=friendlyType />
+ <#if safeType=="byte[]"><#assign safeType="ByteArray" /></#if>
+
+ @Override
+ public ${friendlyType} read${safeType}() {
+ return getReaderForIndex(idx()).read${safeType}();
+ }
+
+ </#list>
+
+ public int size() {
+ return getReaderForIndex(idx()).size();
+ }
+
+ <#list vv.types as type>
+ <#list type.minor as minor>
+ <#assign name = minor.class?cap_first />
+ <#assign uncappedName = name?uncap_first/>
+ <#assign boxedType = (minor.boxedType!type.boxedType) />
+ <#assign javaType = (minor.javaType!type.javaType) />
+ <#assign friendlyType = (minor.friendlyType!minor.boxedType!type.boxedType) />
+ <#assign safeType=friendlyType />
+ <#if safeType=="byte[]"><#assign safeType="ByteArray" /></#if>
+ <#if !minor.typeParams?? || minor.class?starts_with("Decimal") >
+
+ private ${name}ReaderImpl ${uncappedName}Reader;
+
+ private ${name}ReaderImpl get${name}() {
+ if (${uncappedName}Reader == null) {
+ ${uncappedName}Reader = new ${name}ReaderImpl(data.get${name}Vector());
+ ${uncappedName}Reader.setPosition(idx());
+ readers[MinorType.${name?upper_case}.ordinal()] = ${uncappedName}Reader;
+ }
+ return ${uncappedName}Reader;
+ }
+
+ public void read(Nullable${name}Holder holder){
+ getReaderForIndex(idx()).read(holder);
+ }
+
+ public void copyAsValue(${name}Writer writer){
+ getReaderForIndex(idx()).copyAsValue(writer);
+ }
+ </#if>
+ </#list>
+ </#list>
+
+ @Override
+ public void copyAsValue(ListWriter writer) {
+ ComplexCopier.copy(this, (FieldWriter) writer);
+ }
+
+ @Override
+ public void setPosition(int index) {
+ super.setPosition(index);
+ for (BaseReader reader : readers) {
+ if (reader != null) {
+ reader.setPosition(index);
+ }
+ }
+ }
+
+ public FieldReader reader(String name){
+ return getStruct().reader(name);
+ }
+
+ public FieldReader reader() {
+ return getList().reader();
+ }
+
+ public boolean next() {
+ return getReaderForIndex(idx()).next();
+ }
+}
diff --git a/src/arrow/java/vector/src/main/codegen/templates/UnionVector.java b/src/arrow/java/vector/src/main/codegen/templates/UnionVector.java
new file mode 100644
index 000000000..1468116c7
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/UnionVector.java
@@ -0,0 +1,854 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.ReferenceManager;
+import org.apache.arrow.memory.util.CommonUtil;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.BaseValueVector;
+import org.apache.arrow.vector.BitVectorHelper;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.complex.AbstractStructVector;
+import org.apache.arrow.vector.complex.NonNullableStructVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.UnionMode;
+import org.apache.arrow.vector.compare.RangeEqualsVisitor;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.DataSizeRoundingUtil;
+
+<@pp.dropOutputFile />
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/UnionVector.java" />
+
+
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.complex;
+
+<#include "/@includes/vv_imports.ftl" />
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.util.CommonUtil;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.complex.impl.ComplexCopier;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.ValueVectorUtility;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.memory.util.ArrowBufPointer;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.vector.BaseValueVector;
+import org.apache.arrow.vector.util.OversizedAllocationException;
+import org.apache.arrow.util.Preconditions;
+
+import static org.apache.arrow.vector.types.UnionMode.Sparse;
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt;
+
+
+
+/*
+ * This class is generated using freemarker and the ${.template_name} template.
+ */
+@SuppressWarnings("unused")
+
+
+/**
+ * A vector which can hold values of different types. It does so by using a StructVector which contains a vector for each
+ * primitive type that is stored. StructVector is used in order to take advantage of its serialization/deserialization methods,
+ * as well as the addOrGet method.
+ *
+ * For performance reasons, UnionVector stores a cached reference to each subtype vector, to avoid having to do the struct lookup
+ * each time the vector is accessed.
+ * Source code generated using FreeMarker template ${.template_name}
+ */
+public class UnionVector extends AbstractContainerVector implements FieldVector {
+ int valueCount;
+
+ NonNullableStructVector internalStruct;
+ protected ArrowBuf typeBuffer;
+
+ private StructVector structVector;
+ private ListVector listVector;
+ private MapVector mapVector;
+
+ private FieldReader reader;
+
+ private int singleType = 0;
+ private ValueVector singleVector;
+
+ private int typeBufferAllocationSizeInBytes;
+
+ private final FieldType fieldType;
+ private final Field[] typeIds = new Field[Byte.MAX_VALUE + 1];
+
+ public static final byte TYPE_WIDTH = 1;
+ private static final FieldType INTERNAL_STRUCT_TYPE = new FieldType(false /*nullable*/,
+ ArrowType.Struct.INSTANCE, null /*dictionary*/, null /*metadata*/);
+
+ public static UnionVector empty(String name, BufferAllocator allocator) {
+ FieldType fieldType = FieldType.nullable(new ArrowType.Union(
+ UnionMode.Sparse, null));
+ return new UnionVector(name, allocator, fieldType, null);
+ }
+
+ public UnionVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) {
+ super(name, allocator, callBack);
+ this.fieldType = fieldType;
+ this.internalStruct = new NonNullableStructVector(
+ "internal",
+ allocator,
+ INTERNAL_STRUCT_TYPE,
+ callBack,
+ AbstractStructVector.ConflictPolicy.CONFLICT_REPLACE,
+ false);
+ this.typeBuffer = allocator.getEmpty();
+ this.typeBufferAllocationSizeInBytes = BaseValueVector.INITIAL_VALUE_ALLOCATION * TYPE_WIDTH;
+ }
+
+ public BufferAllocator getAllocator() {
+ return allocator;
+ }
+
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.UNION;
+ }
+
+ @Override
+ public void initializeChildrenFromFields(List<Field> children) {
+ int count = 0;
+ for (Field child: children) {
+ int typeId = Types.getMinorTypeForArrowType(child.getType()).ordinal();
+ if (fieldType != null) {
+ int[] typeIds = ((ArrowType.Union)fieldType.getType()).getTypeIds();
+ if (typeIds != null) {
+ typeId = typeIds[count++];
+ }
+ }
+ typeIds[typeId] = child;
+ }
+ internalStruct.initializeChildrenFromFields(children);
+ }
+
+ @Override
+ public List<FieldVector> getChildrenFromFields() {
+ return internalStruct.getChildrenFromFields();
+ }
+
+ @Override
+ public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers) {
+ if (ownBuffers.size() != 1) {
+ throw new IllegalArgumentException("Illegal buffer count, expected 1, got: " + ownBuffers.size());
+ }
+ ArrowBuf buffer = ownBuffers.get(0);
+ typeBuffer.getReferenceManager().release();
+ typeBuffer = buffer.getReferenceManager().retain(buffer, allocator);
+ typeBufferAllocationSizeInBytes = checkedCastToInt(typeBuffer.capacity());
+ this.valueCount = fieldNode.getLength();
+ }
+
+ @Override
+ public List<ArrowBuf> getFieldBuffers() {
+ List<ArrowBuf> result = new ArrayList<>(1);
+ setReaderAndWriterIndex();
+ result.add(typeBuffer);
+
+ return result;
+ }
+
+ private void setReaderAndWriterIndex() {
+ typeBuffer.readerIndex(0);
+ typeBuffer.writerIndex(valueCount * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the inner vectors.
+ *
+ * @deprecated This API will be removed as the current implementations no longer support inner vectors.
+ *
+ * @return the inner vectors for this field as defined by the TypeLayout
+ */
+ @Deprecated
+ @Override
+ public List<BufferBacked> getFieldInnerVectors() {
+ throw new UnsupportedOperationException("There are no inner vectors. Use geFieldBuffers");
+ }
+
+ private String fieldName(MinorType type) {
+ return type.name().toLowerCase();
+ }
+
+ private FieldType fieldType(MinorType type) {
+ return FieldType.nullable(type.getType());
+ }
+
+ private <T extends FieldVector> T addOrGet(Types.MinorType minorType, Class<T> c) {
+ return addOrGet(null, minorType, c);
+ }
+
+ private <T extends FieldVector> T addOrGet(String name, Types.MinorType minorType, ArrowType arrowType, Class<T> c) {
+ return internalStruct.addOrGet(name == null ? fieldName(minorType) : name, FieldType.nullable(arrowType), c);
+ }
+
+ private <T extends FieldVector> T addOrGet(String name, Types.MinorType minorType, Class<T> c) {
+ return internalStruct.addOrGet(name == null ? fieldName(minorType) : name, fieldType(minorType), c);
+ }
+
+
+ @Override
+ public long getValidityBufferAddress() {
+ throw new UnsupportedOperationException();
+ }
+
+ public long getTypeBufferAddress() {
+ return typeBuffer.memoryAddress();
+ }
+
+ @Override
+ public long getDataBufferAddress() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long getOffsetBufferAddress() {
+ throw new UnsupportedOperationException();
+ }
+
+ public ArrowBuf getTypeBuffer() {
+ return typeBuffer;
+ }
+
+ @Override
+ public ArrowBuf getValidityBuffer() { throw new UnsupportedOperationException(); }
+
+ @Override
+ public ArrowBuf getDataBuffer() { throw new UnsupportedOperationException(); }
+
+ @Override
+ public ArrowBuf getOffsetBuffer() { throw new UnsupportedOperationException(); }
+
+ public StructVector getStruct() {
+ if (structVector == null) {
+ int vectorCount = internalStruct.size();
+ structVector = addOrGet(MinorType.STRUCT, StructVector.class);
+ if (internalStruct.size() > vectorCount) {
+ structVector.allocateNew();
+ if (callBack != null) {
+ callBack.doWork();
+ }
+ }
+ }
+ return structVector;
+ }
+ <#list vv.types as type>
+ <#list type.minor as minor>
+ <#assign name = minor.class?cap_first />
+ <#assign fields = minor.fields!type.fields />
+ <#assign uncappedName = name?uncap_first/>
+ <#assign lowerCaseName = name?lower_case/>
+ <#if !minor.typeParams?? || minor.class?starts_with("Decimal") >
+
+ private ${name}Vector ${uncappedName}Vector;
+
+ public ${name}Vector get${name}Vector(<#if minor.class?starts_with("Decimal")> ArrowType arrowType</#if>) {
+ return get${name}Vector(null<#if minor.class?starts_with("Decimal")>, arrowType</#if>);
+ }
+
+ public ${name}Vector get${name}Vector(String name<#if minor.class?starts_with("Decimal")>, ArrowType arrowType</#if>) {
+ if (${uncappedName}Vector == null) {
+ int vectorCount = internalStruct.size();
+ ${uncappedName}Vector = addOrGet(name, MinorType.${name?upper_case},<#if minor.class?starts_with("Decimal")> arrowType,</#if> ${name}Vector.class);
+ if (internalStruct.size() > vectorCount) {
+ ${uncappedName}Vector.allocateNew();
+ if (callBack != null) {
+ callBack.doWork();
+ }
+ }
+ }
+ return ${uncappedName}Vector;
+ }
+ <#if minor.class?starts_with("Decimal")>
+ public ${name}Vector get${name}Vector() {
+ if (${uncappedName}Vector == null) {
+ throw new IllegalArgumentException("No ${uncappedName} present. Provide ArrowType argument to create a new vector");
+ }
+ return ${uncappedName}Vector;
+ }
+ </#if>
+ </#if>
+ </#list>
+ </#list>
+
+ public ListVector getList() {
+ if (listVector == null) {
+ int vectorCount = internalStruct.size();
+ listVector = addOrGet(MinorType.LIST, ListVector.class);
+ if (internalStruct.size() > vectorCount) {
+ listVector.allocateNew();
+ if (callBack != null) {
+ callBack.doWork();
+ }
+ }
+ }
+ return listVector;
+ }
+
+ public MapVector getMap() {
+ if (mapVector == null) {
+ throw new IllegalArgumentException("No map present. Provide ArrowType argument to create a new vector");
+ }
+ return mapVector;
+ }
+
+ public MapVector getMap(ArrowType arrowType) {
+ return getMap(null, arrowType);
+ }
+
+ public MapVector getMap(String name, ArrowType arrowType) {
+ if (mapVector == null) {
+ int vectorCount = internalStruct.size();
+ mapVector = addOrGet(name, MinorType.MAP, arrowType, MapVector.class);
+ if (internalStruct.size() > vectorCount) {
+ mapVector.allocateNew();
+ if (callBack != null) {
+ callBack.doWork();
+ }
+ }
+ }
+ return mapVector;
+ }
+
+ public int getTypeValue(int index) {
+ return typeBuffer.getByte(index * TYPE_WIDTH);
+ }
+
+ @Override
+ public void allocateNew() throws OutOfMemoryException {
+ /* new allocation -- clear the current buffers */
+ clear();
+ internalStruct.allocateNew();
+ try {
+ allocateTypeBuffer();
+ } catch (Exception e) {
+ clear();
+ throw e;
+ }
+ }
+
+ @Override
+ public boolean allocateNewSafe() {
+ /* new allocation -- clear the current buffers */
+ clear();
+ boolean safe = internalStruct.allocateNewSafe();
+ if (!safe) { return false; }
+ try {
+ allocateTypeBuffer();
+ } catch (Exception e) {
+ clear();
+ return false;
+ }
+
+ return true;
+ }
+
+ private void allocateTypeBuffer() {
+ typeBuffer = allocator.buffer(typeBufferAllocationSizeInBytes);
+ typeBuffer.readerIndex(0);
+ typeBuffer.setZero(0, typeBuffer.capacity());
+ }
+
+ @Override
+ public void reAlloc() {
+ internalStruct.reAlloc();
+ reallocTypeBuffer();
+ }
+
+ private void reallocTypeBuffer() {
+ final long currentBufferCapacity = typeBuffer.capacity();
+ long newAllocationSize = currentBufferCapacity * 2;
+ if (newAllocationSize == 0) {
+ if (typeBufferAllocationSizeInBytes > 0) {
+ newAllocationSize = typeBufferAllocationSizeInBytes;
+ } else {
+ newAllocationSize = BaseValueVector.INITIAL_VALUE_ALLOCATION * TYPE_WIDTH * 2;
+ }
+ }
+ newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize);
+ assert newAllocationSize >= 1;
+
+ if (newAllocationSize > BaseValueVector.MAX_ALLOCATION_SIZE) {
+ throw new OversizedAllocationException("Unable to expand the buffer");
+ }
+
+ final ArrowBuf newBuf = allocator.buffer(checkedCastToInt(newAllocationSize));
+ newBuf.setBytes(0, typeBuffer, 0, currentBufferCapacity);
+ newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity);
+ typeBuffer.getReferenceManager().release(1);
+ typeBuffer = newBuf;
+ typeBufferAllocationSizeInBytes = (int)newAllocationSize;
+ }
+
+ @Override
+ public void setInitialCapacity(int numRecords) { }
+
+ @Override
+ public int getValueCapacity() {
+ return Math.min(getTypeBufferValueCapacity(), internalStruct.getValueCapacity());
+ }
+
+ @Override
+ public void close() {
+ clear();
+ }
+
+ @Override
+ public void clear() {
+ valueCount = 0;
+ typeBuffer.getReferenceManager().release();
+ typeBuffer = allocator.getEmpty();
+ internalStruct.clear();
+ }
+
+ @Override
+ public void reset() {
+ valueCount = 0;
+ typeBuffer.setZero(0, typeBuffer.capacity());
+ internalStruct.reset();
+ }
+
+ @Override
+ public Field getField() {
+ List<org.apache.arrow.vector.types.pojo.Field> childFields = new ArrayList<>();
+ List<FieldVector> children = internalStruct.getChildren();
+ int[] typeIds = new int[children.size()];
+ for (ValueVector v : children) {
+ typeIds[childFields.size()] = v.getMinorType().ordinal();
+ childFields.add(v.getField());
+ }
+
+ FieldType fieldType;
+ if (this.fieldType == null) {
+ fieldType = FieldType.nullable(new ArrowType.Union(Sparse, typeIds));
+ } else {
+ final UnionMode mode = ((ArrowType.Union)this.fieldType.getType()).getMode();
+ fieldType = new FieldType(this.fieldType.isNullable(), new ArrowType.Union(mode, typeIds),
+ this.fieldType.getDictionary(), this.fieldType.getMetadata());
+ }
+
+ return new Field(name, fieldType, childFields);
+ }
+
+ @Override
+ public TransferPair getTransferPair(BufferAllocator allocator) {
+ return getTransferPair(name, allocator);
+ }
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return getTransferPair(ref, allocator, null);
+ }
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) {
+ return new org.apache.arrow.vector.complex.UnionVector.TransferImpl(ref, allocator, callBack);
+ }
+
+ @Override
+ public TransferPair makeTransferPair(ValueVector target) {
+ return new TransferImpl((UnionVector) target);
+ }
+
+ @Override
+ public void copyFrom(int inIndex, int outIndex, ValueVector from) {
+ Preconditions.checkArgument(this.getMinorType() == from.getMinorType());
+ UnionVector fromCast = (UnionVector) from;
+ fromCast.getReader().setPosition(inIndex);
+ getWriter().setPosition(outIndex);
+ ComplexCopier.copy(fromCast.reader, writer);
+ }
+
+ @Override
+ public void copyFromSafe(int inIndex, int outIndex, ValueVector from) {
+ copyFrom(inIndex, outIndex, from);
+ }
+
+ public FieldVector addVector(FieldVector v) {
+ final String name = v.getName().isEmpty() ? fieldName(v.getMinorType()) : v.getName();
+ Preconditions.checkState(internalStruct.getChild(name) == null, String.format("%s vector already exists", name));
+ final FieldVector newVector = internalStruct.addOrGet(name, v.getField().getFieldType(), v.getClass());
+ v.makeTransferPair(newVector).transfer();
+ internalStruct.putChild(name, newVector);
+ if (callBack != null) {
+ callBack.doWork();
+ }
+ return newVector;
+ }
+
+ /**
+ * Directly put a vector to internalStruct without creating a new one with same type.
+ */
+ public void directAddVector(FieldVector v) {
+ String name = fieldName(v.getMinorType());
+ Preconditions.checkState(internalStruct.getChild(name) == null, String.format("%s vector already exists", name));
+ internalStruct.putChild(name, v);
+ if (callBack != null) {
+ callBack.doWork();
+ }
+ }
+
+ private class TransferImpl implements TransferPair {
+ private final TransferPair internalStructVectorTransferPair;
+ private final UnionVector to;
+
+ public TransferImpl(String name, BufferAllocator allocator, CallBack callBack) {
+ to = new UnionVector(name, allocator, /* field type */ null, callBack);
+ internalStructVectorTransferPair = internalStruct.makeTransferPair(to.internalStruct);
+ }
+
+ public TransferImpl(UnionVector to) {
+ this.to = to;
+ internalStructVectorTransferPair = internalStruct.makeTransferPair(to.internalStruct);
+ }
+
+ @Override
+ public void transfer() {
+ to.clear();
+ ReferenceManager refManager = typeBuffer.getReferenceManager();
+ to.typeBuffer = refManager.transferOwnership(typeBuffer, to.allocator).getTransferredBuffer();
+ internalStructVectorTransferPair.transfer();
+ to.valueCount = valueCount;
+ clear();
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ Preconditions.checkArgument(startIndex >= 0 && length >= 0 && startIndex + length <= valueCount,
+ "Invalid parameters startIndex: %s, length: %s for valueCount: %s", startIndex, length, valueCount);
+ to.clear();
+
+ internalStructVectorTransferPair.splitAndTransfer(startIndex, length);
+ final int startPoint = startIndex * TYPE_WIDTH;
+ final int sliceLength = length * TYPE_WIDTH;
+ final ArrowBuf slicedBuffer = typeBuffer.slice(startPoint, sliceLength);
+ final ReferenceManager refManager = slicedBuffer.getReferenceManager();
+ to.typeBuffer = refManager.transferOwnership(slicedBuffer, to.allocator).getTransferredBuffer();
+ to.setValueCount(length);
+ }
+
+ @Override
+ public ValueVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void copyValueSafe(int from, int to) {
+ this.to.copyFrom(from, to, UnionVector.this);
+ }
+ }
+
+ @Override
+ public FieldReader getReader() {
+ if (reader == null) {
+ reader = new UnionReader(this);
+ }
+ return reader;
+ }
+
+ public FieldWriter getWriter() {
+ if (writer == null) {
+ writer = new UnionWriter(this);
+ }
+ return writer;
+ }
+
+ @Override
+ public int getBufferSize() {
+ if (valueCount == 0) { return 0; }
+
+ return (valueCount * TYPE_WIDTH) + internalStruct.getBufferSize();
+ }
+
+ @Override
+ public int getBufferSizeFor(final int valueCount) {
+ if (valueCount == 0) {
+ return 0;
+ }
+
+ long bufferSize = 0;
+ for (final ValueVector v : (Iterable<ValueVector>) this) {
+ bufferSize += v.getBufferSizeFor(valueCount);
+ }
+
+ return (int) bufferSize + (valueCount * TYPE_WIDTH);
+ }
+
+ @Override
+ public ArrowBuf[] getBuffers(boolean clear) {
+ List<ArrowBuf> list = new java.util.ArrayList<>();
+ setReaderAndWriterIndex();
+ if (getBufferSize() != 0) {
+ list.add(typeBuffer);
+ list.addAll(java.util.Arrays.asList(internalStruct.getBuffers(clear)));
+ }
+ if (clear) {
+ valueCount = 0;
+ typeBuffer.getReferenceManager().retain();
+ typeBuffer.getReferenceManager().release();
+ typeBuffer = allocator.getEmpty();
+ }
+ return list.toArray(new ArrowBuf[list.size()]);
+ }
+
+ @Override
+ public Iterator<ValueVector> iterator() {
+ return internalStruct.iterator();
+ }
+
+ public ValueVector getVector(int index) {
+ return getVector(index, null);
+ }
+
+ public ValueVector getVector(int index, ArrowType arrowType) {
+ int type = typeBuffer.getByte(index * TYPE_WIDTH);
+ return getVectorByType(type, arrowType);
+ }
+
+ public ValueVector getVectorByType(int typeId) {
+ return getVectorByType(typeId, null);
+ }
+
+ public ValueVector getVectorByType(int typeId, ArrowType arrowType) {
+ Field type = typeIds[typeId];
+ Types.MinorType minorType;
+ String name = null;
+ if (type == null) {
+ minorType = Types.MinorType.values()[typeId];
+ } else {
+ minorType = Types.getMinorTypeForArrowType(type.getType());
+ name = type.getName();
+ }
+ switch (minorType) {
+ case NULL:
+ return null;
+ <#list vv.types as type>
+ <#list type.minor as minor>
+ <#assign name = minor.class?cap_first />
+ <#assign fields = minor.fields!type.fields />
+ <#assign uncappedName = name?uncap_first/>
+ <#if !minor.typeParams?? || minor.class?starts_with("Decimal") >
+ case ${name?upper_case}:
+ return get${name}Vector(name<#if minor.class?starts_with("Decimal")>, arrowType</#if>);
+ </#if>
+ </#list>
+ </#list>
+ case STRUCT:
+ return getStruct();
+ case LIST:
+ return getList();
+ case MAP:
+ return getMap(name, arrowType);
+ default:
+ throw new UnsupportedOperationException("Cannot support type: " + MinorType.values()[typeId]);
+ }
+ }
+
+ public Object getObject(int index) {
+ ValueVector vector = getVector(index);
+ if (vector != null) {
+ return vector.isNull(index) ? null : vector.getObject(index);
+ }
+ return null;
+ }
+
+ public byte[] get(int index) {
+ return null;
+ }
+
+ public void get(int index, ComplexHolder holder) {
+ }
+
+ public void get(int index, UnionHolder holder) {
+ FieldReader reader = new UnionReader(UnionVector.this);
+ reader.setPosition(index);
+ holder.reader = reader;
+ }
+
+ public int getValueCount() {
+ return valueCount;
+ }
+
+ /**
+ * IMPORTANT: Union types always return non null as there is no validity buffer.
+ *
+ * To check validity correctly you must check the underlying vector.
+ */
+ public boolean isNull(int index) {
+ return false;
+ }
+
+ @Override
+ public int getNullCount() {
+ return 0;
+ }
+
+ public int isSet(int index) {
+ return isNull(index) ? 0 : 1;
+ }
+
+ UnionWriter writer;
+
+ public void setValueCount(int valueCount) {
+ this.valueCount = valueCount;
+ while (valueCount > getTypeBufferValueCapacity()) {
+ reallocTypeBuffer();
+ }
+ internalStruct.setValueCount(valueCount);
+ }
+
+ public void setSafe(int index, UnionHolder holder) {
+ setSafe(index, holder, null);
+ }
+
+ public void setSafe(int index, UnionHolder holder, ArrowType arrowType) {
+ FieldReader reader = holder.reader;
+ if (writer == null) {
+ writer = new UnionWriter(UnionVector.this);
+ }
+ writer.setPosition(index);
+ MinorType type = reader.getMinorType();
+ switch (type) {
+ <#list vv.types as type>
+ <#list type.minor as minor>
+ <#assign name = minor.class?cap_first />
+ <#assign fields = minor.fields!type.fields />
+ <#assign uncappedName = name?uncap_first/>
+ <#if !minor.typeParams?? || minor.class?starts_with("Decimal") >
+ case ${name?upper_case}:
+ Nullable${name}Holder ${uncappedName}Holder = new Nullable${name}Holder();
+ reader.read(${uncappedName}Holder);
+ setSafe(index, ${uncappedName}Holder<#if minor.class?starts_with("Decimal")>, arrowType</#if>);
+ break;
+ </#if>
+ </#list>
+ </#list>
+ case STRUCT: {
+ ComplexCopier.copy(reader, writer);
+ break;
+ }
+ case LIST: {
+ ComplexCopier.copy(reader, writer);
+ break;
+ }
+ default:
+ throw new UnsupportedOperationException();
+ }
+ }
+ <#list vv.types as type>
+ <#list type.minor as minor>
+ <#assign name = minor.class?cap_first />
+ <#assign fields = minor.fields!type.fields />
+ <#assign uncappedName = name?uncap_first/>
+ <#if !minor.typeParams?? || minor.class?starts_with("Decimal") >
+ public void setSafe(int index, Nullable${name}Holder holder<#if minor.class?starts_with("Decimal")>, ArrowType arrowType</#if>) {
+ setType(index, MinorType.${name?upper_case});
+ get${name}Vector(null<#if minor.class?starts_with("Decimal")>, arrowType</#if>).setSafe(index, holder);
+ }
+
+ </#if>
+ </#list>
+ </#list>
+
+ public void setType(int index, MinorType type) {
+ while (index >= getTypeBufferValueCapacity()) {
+ reallocTypeBuffer();
+ }
+ typeBuffer.setByte(index * TYPE_WIDTH , (byte) type.ordinal());
+ }
+
+ private int getTypeBufferValueCapacity() {
+ return capAtMaxInt(typeBuffer.capacity() / TYPE_WIDTH);
+ }
+
+ @Override
+ public int hashCode(int index) {
+ return hashCode(index, null);
+ }
+
+ @Override
+ public int hashCode(int index, ArrowBufHasher hasher) {
+ ValueVector vec = getVector(index);
+ if (vec == null) {
+ return ArrowBufPointer.NULL_HASH_CODE;
+ }
+ return vec.hashCode(index, hasher);
+ }
+
+ @Override
+ public <OUT, IN> OUT accept(VectorVisitor<OUT, IN> visitor, IN value) {
+ return visitor.visit(this, value);
+ }
+
+ @Override
+ public String getName() {
+ return name;
+ }
+
+ @Override
+ public String toString() {
+ return ValueVectorUtility.getToString(this, 0, getValueCount());
+ }
+
+ @Override
+ public <T extends FieldVector> T addOrGet(String name, FieldType fieldType, Class<T> clazz) {
+ return internalStruct.addOrGet(name, fieldType, clazz);
+ }
+
+ @Override
+ public <T extends FieldVector> T getChild(String name, Class<T> clazz) {
+ return internalStruct.getChild(name, clazz);
+ }
+
+ @Override
+ public VectorWithOrdinal getChildVectorWithOrdinal(String name) {
+ return internalStruct.getChildVectorWithOrdinal(name);
+ }
+
+ @Override
+ public int size() {
+ return internalStruct.size();
+ }
+
+ @Override
+ public void setInitialCapacity(int valueCount, double density) {
+ for (final ValueVector vector : internalStruct) {
+ if (vector instanceof DensityAwareVector) {
+ ((DensityAwareVector) vector).setInitialCapacity(valueCount, density);
+ } else {
+ vector.setInitialCapacity(valueCount);
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/codegen/templates/UnionWriter.java b/src/arrow/java/vector/src/main/codegen/templates/UnionWriter.java
new file mode 100644
index 000000000..fc4fd7dd7
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/UnionWriter.java
@@ -0,0 +1,364 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.complex.impl.NullableStructWriterFactory;
+import org.apache.arrow.vector.types.Types;
+
+<@pp.dropOutputFile />
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/UnionWriter.java" />
+
+
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.complex.impl;
+
+<#include "/@includes/vv_imports.ftl" />
+import org.apache.arrow.vector.complex.writer.BaseWriter;
+import org.apache.arrow.vector.types.Types.MinorType;
+
+/*
+ * This class is generated using freemarker and the ${.template_name} template.
+ */
+@SuppressWarnings("unused")
+public class UnionWriter extends AbstractFieldWriter implements FieldWriter {
+
+ UnionVector data;
+ private StructWriter structWriter;
+ private UnionListWriter listWriter;
+ private UnionMapWriter mapWriter;
+ private List<BaseWriter> writers = new java.util.ArrayList<>();
+ private final NullableStructWriterFactory nullableStructWriterFactory;
+
+ public UnionWriter(UnionVector vector) {
+ this(vector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance());
+ }
+
+ public UnionWriter(UnionVector vector, NullableStructWriterFactory nullableStructWriterFactory) {
+ data = vector;
+ this.nullableStructWriterFactory = nullableStructWriterFactory;
+ }
+
+ @Override
+ public void setPosition(int index) {
+ super.setPosition(index);
+ for (BaseWriter writer : writers) {
+ writer.setPosition(index);
+ }
+ }
+
+
+ @Override
+ public void start() {
+ data.setType(idx(), MinorType.STRUCT);
+ getStructWriter().start();
+ }
+
+ @Override
+ public void end() {
+ getStructWriter().end();
+ }
+
+ @Override
+ public void startList() {
+ getListWriter().startList();
+ data.setType(idx(), MinorType.LIST);
+ }
+
+ @Override
+ public void endList() {
+ getListWriter().endList();
+ }
+
+ @Override
+ public void startMap() {
+ getMapWriter().startMap();
+ data.setType(idx(), MinorType.MAP);
+ }
+
+ @Override
+ public void endMap() {
+ getMapWriter().endMap();
+ }
+
+ @Override
+ public void startEntry() {
+ getMapWriter().startEntry();
+ }
+
+ @Override
+ public MapWriter key() {
+ return getMapWriter().key();
+ }
+
+ @Override
+ public MapWriter value() {
+ return getMapWriter().value();
+ }
+
+ @Override
+ public void endEntry() {
+ getMapWriter().endEntry();
+ }
+
+ private StructWriter getStructWriter() {
+ if (structWriter == null) {
+ structWriter = nullableStructWriterFactory.build(data.getStruct());
+ structWriter.setPosition(idx());
+ writers.add(structWriter);
+ }
+ return structWriter;
+ }
+
+ public StructWriter asStruct() {
+ data.setType(idx(), MinorType.STRUCT);
+ return getStructWriter();
+ }
+
+ private ListWriter getListWriter() {
+ if (listWriter == null) {
+ listWriter = new UnionListWriter(data.getList(), nullableStructWriterFactory);
+ listWriter.setPosition(idx());
+ writers.add(listWriter);
+ }
+ return listWriter;
+ }
+
+ public ListWriter asList() {
+ data.setType(idx(), MinorType.LIST);
+ return getListWriter();
+ }
+
+ private MapWriter getMapWriter() {
+ if (mapWriter == null) {
+ mapWriter = new UnionMapWriter(data.getMap(new ArrowType.Map(false)));
+ mapWriter.setPosition(idx());
+ writers.add(mapWriter);
+ }
+ return mapWriter;
+ }
+
+ private MapWriter getMapWriter(ArrowType arrowType) {
+ if (mapWriter == null) {
+ mapWriter = new UnionMapWriter(data.getMap(arrowType));
+ mapWriter.setPosition(idx());
+ writers.add(mapWriter);
+ }
+ return mapWriter;
+ }
+
+ public MapWriter asMap(ArrowType arrowType) {
+ data.setType(idx(), MinorType.MAP);
+ return getMapWriter(arrowType);
+ }
+
+ BaseWriter getWriter(MinorType minorType) {
+ return getWriter(minorType, null);
+ }
+
+ BaseWriter getWriter(MinorType minorType, ArrowType arrowType) {
+ switch (minorType) {
+ case STRUCT:
+ return getStructWriter();
+ case LIST:
+ return getListWriter();
+ case MAP:
+ return getMapWriter(arrowType);
+ <#list vv.types as type>
+ <#list type.minor as minor>
+ <#assign name = minor.class?cap_first />
+ <#assign fields = minor.fields!type.fields />
+ <#assign uncappedName = name?uncap_first/>
+ <#if !minor.typeParams?? || minor.class?starts_with("Decimal")>
+ case ${name?upper_case}:
+ return get${name}Writer(<#if minor.class?starts_with("Decimal") >arrowType</#if>);
+ </#if>
+ </#list>
+ </#list>
+ default:
+ throw new UnsupportedOperationException("Unknown type: " + minorType);
+ }
+ }
+ <#list vv.types as type>
+ <#list type.minor as minor>
+ <#assign name = minor.class?cap_first />
+ <#assign fields = minor.fields!type.fields />
+ <#assign uncappedName = name?uncap_first/>
+ <#assign friendlyType = (minor.friendlyType!minor.boxedType!type.boxedType) />
+ <#if !minor.typeParams?? || minor.class?starts_with("Decimal") >
+
+ private ${name}Writer ${name?uncap_first}Writer;
+
+ private ${name}Writer get${name}Writer(<#if minor.class?starts_with("Decimal")>ArrowType arrowType</#if>) {
+ if (${uncappedName}Writer == null) {
+ ${uncappedName}Writer = new ${name}WriterImpl(data.get${name}Vector(<#if minor.class?starts_with("Decimal")>arrowType</#if>));
+ ${uncappedName}Writer.setPosition(idx());
+ writers.add(${uncappedName}Writer);
+ }
+ return ${uncappedName}Writer;
+ }
+
+ public ${name}Writer as${name}(<#if minor.class?starts_with("Decimal")>ArrowType arrowType</#if>) {
+ data.setType(idx(), MinorType.${name?upper_case});
+ return get${name}Writer(<#if minor.class?starts_with("Decimal")>arrowType</#if>);
+ }
+
+ @Override
+ public void write(${name}Holder holder) {
+ data.setType(idx(), MinorType.${name?upper_case});
+ <#if minor.class?starts_with("Decimal")>ArrowType arrowType = new ArrowType.Decimal(holder.precision, holder.scale, ${name}Holder.WIDTH * 8);</#if>
+ get${name}Writer(<#if minor.class?starts_with("Decimal")>arrowType</#if>).setPosition(idx());
+ get${name}Writer(<#if minor.class?starts_with("Decimal")>arrowType</#if>).write${name}(<#list fields as field>holder.${field.name}<#if field_has_next>, </#if></#list><#if minor.class?starts_with("Decimal")>, arrowType</#if>);
+ }
+
+ public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list><#if minor.class?starts_with("Decimal")>, ArrowType arrowType</#if>) {
+ data.setType(idx(), MinorType.${name?upper_case});
+ get${name}Writer(<#if minor.class?starts_with("Decimal")>arrowType</#if>).setPosition(idx());
+ get${name}Writer(<#if minor.class?starts_with("Decimal")>arrowType</#if>).write${name}(<#list fields as field>${field.name}<#if field_has_next>, </#if></#list><#if minor.class?starts_with("Decimal")>, arrowType</#if>);
+ }
+ <#if minor.class?starts_with("Decimal")>
+ public void write${name}(${friendlyType} value) {
+ data.setType(idx(), MinorType.${name?upper_case});
+ ArrowType arrowType = new ArrowType.Decimal(value.precision(), value.scale(), ${name}Vector.TYPE_WIDTH * 8);
+ get${name}Writer(arrowType).setPosition(idx());
+ get${name}Writer(arrowType).write${name}(value);
+ }
+
+ public void writeBigEndianBytesTo${name}(byte[] value, ArrowType arrowType) {
+ data.setType(idx(), MinorType.${name?upper_case});
+ get${name}Writer(arrowType).setPosition(idx());
+ get${name}Writer(arrowType).writeBigEndianBytesTo${name}(value, arrowType);
+ }
+ </#if>
+ </#if>
+ </#list>
+ </#list>
+
+ public void writeNull() {
+ }
+
+ @Override
+ public StructWriter struct() {
+ data.setType(idx(), MinorType.LIST);
+ getListWriter().setPosition(idx());
+ return getListWriter().struct();
+ }
+
+ @Override
+ public ListWriter list() {
+ data.setType(idx(), MinorType.LIST);
+ getListWriter().setPosition(idx());
+ return getListWriter().list();
+ }
+
+ @Override
+ public ListWriter list(String name) {
+ data.setType(idx(), MinorType.STRUCT);
+ getStructWriter().setPosition(idx());
+ return getStructWriter().list(name);
+ }
+
+ @Override
+ public StructWriter struct(String name) {
+ data.setType(idx(), MinorType.STRUCT);
+ getStructWriter().setPosition(idx());
+ return getStructWriter().struct(name);
+ }
+
+ @Override
+ public MapWriter map() {
+ data.setType(idx(), MinorType.MAP);
+ getListWriter().setPosition(idx());
+ return getListWriter().map();
+ }
+
+ @Override
+ public MapWriter map(boolean keysSorted) {
+ data.setType(idx(), MinorType.MAP);
+ getListWriter().setPosition(idx());
+ return getListWriter().map(keysSorted);
+ }
+
+ @Override
+ public MapWriter map(String name) {
+ data.setType(idx(), MinorType.MAP);
+ getStructWriter().setPosition(idx());
+ return getStructWriter().map(name);
+ }
+
+ @Override
+ public MapWriter map(String name, boolean keysSorted) {
+ data.setType(idx(), MinorType.MAP);
+ getStructWriter().setPosition(idx());
+ return getStructWriter().map(name, keysSorted);
+ }
+
+ <#list vv.types as type><#list type.minor as minor>
+ <#assign lowerName = minor.class?uncap_first />
+ <#if lowerName == "int" ><#assign lowerName = "integer" /></#if>
+ <#assign upperName = minor.class?upper_case />
+ <#assign capName = minor.class?cap_first />
+ <#if !minor.typeParams?? || minor.class?starts_with("Decimal") >
+ @Override
+ public ${capName}Writer ${lowerName}(String name) {
+ data.setType(idx(), MinorType.STRUCT);
+ getStructWriter().setPosition(idx());
+ return getStructWriter().${lowerName}(name);
+ }
+
+ @Override
+ public ${capName}Writer ${lowerName}() {
+ data.setType(idx(), MinorType.LIST);
+ getListWriter().setPosition(idx());
+ return getListWriter().${lowerName}();
+ }
+ </#if>
+ <#if minor.class?starts_with("Decimal")>
+ @Override
+ public ${capName}Writer ${lowerName}(String name<#list minor.typeParams as typeParam>, ${typeParam.type} ${typeParam.name}</#list>) {
+ data.setType(idx(), MinorType.STRUCT);
+ getStructWriter().setPosition(idx());
+ return getStructWriter().${lowerName}(name<#list minor.typeParams as typeParam>, ${typeParam.name}</#list>);
+ }
+ </#if>
+ </#list></#list>
+
+ @Override
+ public void allocate() {
+ data.allocateNew();
+ }
+
+ @Override
+ public void clear() {
+ data.clear();
+ }
+
+ @Override
+ public void close() throws Exception {
+ data.close();
+ }
+
+ @Override
+ public Field getField() {
+ return data.getField();
+ }
+
+ @Override
+ public int getValueCapacity() {
+ return data.getValueCapacity();
+ }
+}
diff --git a/src/arrow/java/vector/src/main/codegen/templates/ValueHolders.java b/src/arrow/java/vector/src/main/codegen/templates/ValueHolders.java
new file mode 100644
index 000000000..973efd870
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/ValueHolders.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+<@pp.dropOutputFile />
+<#list vv.modes as mode>
+<#list vv.types as type>
+<#list type.minor as minor>
+
+<#assign className="${mode.prefix}${minor.class}Holder" />
+<@pp.changeOutputFile name="/org/apache/arrow/vector/holders/${className}.java" />
+
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.holders;
+
+<#include "/@includes/vv_imports.ftl" />
+
+/**
+ * Source code generated using FreeMarker template ${.template_name}
+ */
+public final class ${className} implements ValueHolder{
+
+ <#if mode.name == "Repeated">
+
+ /** The first index (inclusive) into the Vector. **/
+ public int start;
+
+ /** The last index (exclusive) into the Vector. **/
+ public int end;
+
+ /** The Vector holding the actual values. **/
+ public ${minor.class}Vector vector;
+
+ <#else>
+ public static final int WIDTH = ${type.width};
+
+ <#if mode.name == "Optional">public int isSet;
+ <#else>public final int isSet = 1;</#if>
+ <#assign fields = (minor.fields!type.fields) + (minor.typeParams![]) />
+ <#list fields as field>
+ public ${field.type} ${field.name};
+ </#list>
+
+ /**
+ * Reason for not supporting the operation is that ValueHolders are potential scalar
+ * replacements and hence we don't want any methods to be invoked on them.
+ */
+ public int hashCode(){
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * Reason for not supporting the operation is that ValueHolders are potential scalar
+ * replacements and hence we don't want any methods to be invoked on them.
+ */
+ public String toString(){
+ throw new UnsupportedOperationException();
+ }
+ </#if>
+
+
+
+
+}
+
+</#list>
+</#list>
+</#list> \ No newline at end of file
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/AddOrGetResult.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/AddOrGetResult.java
new file mode 100644
index 000000000..b41dbb245
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/AddOrGetResult.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import org.apache.arrow.util.Preconditions;
+
+/**
+ * Tuple class containing a vector and whether is was created.
+ *
+ * @param <V> The type of vector the result is for.
+ */
+public class AddOrGetResult<V extends ValueVector> {
+ private final V vector;
+ private final boolean created;
+
+ /** Constructs a new object. */
+ public AddOrGetResult(V vector, boolean created) {
+ this.vector = Preconditions.checkNotNull(vector);
+ this.created = created;
+ }
+
+ /** Returns the vector. */
+ public V getVector() {
+ return vector;
+ }
+
+ /** Returns whether the vector is created. */
+ public boolean isCreated() {
+ return created;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/AllocationHelper.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/AllocationHelper.java
new file mode 100644
index 000000000..6824756d8
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/AllocationHelper.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import org.apache.arrow.vector.complex.RepeatedFixedWidthVectorLike;
+import org.apache.arrow.vector.complex.RepeatedVariableWidthVectorLike;
+
+/** Helper utility methods for allocating storage for Vectors. */
+public class AllocationHelper {
+ private AllocationHelper() {}
+
+ /**
+ * Allocates the vector.
+ *
+ * @param v The vector to allocate.
+ * @param valueCount Number of values to allocate.
+ * @param bytesPerValue bytes per value.
+ * @throws org.apache.arrow.memory.OutOfMemoryException if it can't allocate the memory.
+ */
+ public static void allocate(ValueVector v, int valueCount, int bytesPerValue) {
+ allocate(v, valueCount, bytesPerValue, 5);
+ }
+
+ /**
+ * Allocates memory for a vector assuming given number of values and their width.
+ *
+ * @param v The vector the allocate.
+ * @param valueCount The number of elements to allocate.
+ * @param bytesPerValue The bytes per value to use for allocating underlying storage
+ * @param childValCount If <code>v</code> is a repeated vector, this is number of child elements to allocate.
+ * @throws org.apache.arrow.memory.OutOfMemoryException if it can't allocate the memory.
+ */
+ public static void allocatePrecomputedChildCount(
+ ValueVector v,
+ int valueCount,
+ int bytesPerValue,
+ int childValCount) {
+ if (v instanceof FixedWidthVector) {
+ ((FixedWidthVector) v).allocateNew(valueCount);
+ } else if (v instanceof VariableWidthVector) {
+ ((VariableWidthVector) v).allocateNew(valueCount * bytesPerValue, valueCount);
+ } else if (v instanceof RepeatedFixedWidthVectorLike) {
+ ((RepeatedFixedWidthVectorLike) v).allocateNew(valueCount, childValCount);
+ } else if (v instanceof RepeatedVariableWidthVectorLike) {
+ ((RepeatedVariableWidthVectorLike) v).allocateNew(childValCount * bytesPerValue, valueCount, childValCount);
+ } else {
+ v.allocateNew();
+ }
+ }
+
+ /**
+ * Allocates memory for a vector assuming given number of values and their width.
+ *
+ * @param v The vector the allocate.
+ * @param valueCount The number of elements to allocate.
+ * @param bytesPerValue The bytes per value to use for allocating underlying storage
+ * @param repeatedPerTop If <code>v</code> is a repeated vector, this is assumed number of elements per child.
+ * @throws org.apache.arrow.memory.OutOfMemoryException if it can't allocate the memory
+ */
+ public static void allocate(ValueVector v, int valueCount, int bytesPerValue, int repeatedPerTop) {
+ allocatePrecomputedChildCount(v, valueCount, bytesPerValue, repeatedPerTop * valueCount);
+ }
+
+ /**
+ * Allocates the exact amount if v is fixed width, otherwise falls back to dynamic allocation.
+ *
+ * @param v value vector we are trying to allocate
+ * @param valueCount size we are trying to allocate
+ * @throws org.apache.arrow.memory.OutOfMemoryException if it can't allocate the memory
+ */
+ public static void allocateNew(ValueVector v, int valueCount) {
+ if (v instanceof FixedWidthVector) {
+ ((FixedWidthVector) v).allocateNew(valueCount);
+ } else if (v instanceof VariableWidthVector) {
+ ((VariableWidthVector) v).allocateNew(valueCount);
+ } else {
+ v.allocateNew();
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java
new file mode 100644
index 000000000..ded58b22b
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java
@@ -0,0 +1,930 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.util.ArrowBufPointer;
+import org.apache.arrow.memory.util.ByteFunctionHelpers;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.OversizedAllocationException;
+import org.apache.arrow.vector.util.TransferPair;
+
+import io.netty.util.internal.PlatformDependent;
+
+/**
+ * BaseFixedWidthVector provides an abstract interface for
+ * implementing vectors of fixed width values. The vectors are nullable
+ * implying that zero or more elements in the vector could be NULL.
+ */
+public abstract class BaseFixedWidthVector extends BaseValueVector
+ implements FixedWidthVector, FieldVector, VectorDefinitionSetter {
+ private final int typeWidth;
+
+ protected int lastValueCapacity;
+ protected int actualValueCapacity;
+
+ protected final Field field;
+ private int allocationMonitor;
+ protected ArrowBuf validityBuffer;
+ protected ArrowBuf valueBuffer;
+ protected int valueCount;
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param field field materialized by this vector
+ * @param allocator The allocator to use for allocating memory for the vector.
+ * @param typeWidth The width in bytes of the type.
+ */
+ public BaseFixedWidthVector(Field field, final BufferAllocator allocator, final int typeWidth) {
+ super(allocator);
+ this.typeWidth = typeWidth;
+ this.field = field;
+ valueCount = 0;
+ allocationMonitor = 0;
+ validityBuffer = allocator.getEmpty();
+ valueBuffer = allocator.getEmpty();
+ lastValueCapacity = INITIAL_VALUE_ALLOCATION;
+ refreshValueCapacity();
+ }
+
+
+ public int getTypeWidth() {
+ return typeWidth;
+ }
+
+ @Override
+ public String getName() {
+ return field.getName();
+ }
+
+ /* TODO:
+ * see if getNullCount() can be made faster -- O(1)
+ */
+
+ /* TODO:
+ * Once the entire hierarchy has been refactored, move common functions
+ * like getNullCount(), splitAndTransferValidityBuffer to top level
+ * base class BaseValueVector.
+ *
+ * Along with this, some class members (validityBuffer) can also be
+ * abstracted out to top level base class.
+ *
+ * Right now BaseValueVector is the top level base class for other
+ * vector types in ValueVector hierarchy (non-nullable) and those
+ * vectors have not yet been refactored/removed so moving things to
+ * the top class as of now is not a good idea.
+ */
+
+ /**
+ * Get the memory address of buffer that manages the validity
+ * (NULL or NON-NULL nature) of elements in the vector.
+ * @return starting address of the buffer
+ */
+ @Override
+ public long getValidityBufferAddress() {
+ return (validityBuffer.memoryAddress());
+ }
+
+ /**
+ * Get the memory address of buffer that stores the data for elements
+ * in the vector.
+ * @return starting address of the buffer
+ */
+ @Override
+ public long getDataBufferAddress() {
+ return (valueBuffer.memoryAddress());
+ }
+
+ /**
+ * Get the memory address of buffer that stores the offsets for elements
+ * in the vector. This operation is not supported for fixed-width vectors.
+ * @return starting address of the buffer
+ * @throws UnsupportedOperationException for fixed width vectors
+ */
+ @Override
+ public long getOffsetBufferAddress() {
+ throw new UnsupportedOperationException("not supported for fixed-width vectors");
+ }
+
+ /**
+ * Get buffer that manages the validity (NULL or NON-NULL nature) of
+ * elements in the vector. Consider it as a buffer for internal bit vector
+ * data structure.
+ * @return buffer
+ */
+ @Override
+ public ArrowBuf getValidityBuffer() {
+ return validityBuffer;
+ }
+
+ /**
+ * Get the buffer that stores the data for elements in the vector.
+ * @return buffer
+ */
+ @Override
+ public ArrowBuf getDataBuffer() {
+ return valueBuffer;
+ }
+
+ /**
+ * buffer that stores the offsets for elements
+ * in the vector. This operation is not supported for fixed-width vectors.
+ * @return buffer
+ * @throws UnsupportedOperationException for fixed width vectors
+ */
+ @Override
+ public ArrowBuf getOffsetBuffer() {
+ throw new UnsupportedOperationException("not supported for fixed-width vectors");
+ }
+
+ /**
+ * Sets the desired value capacity for the vector. This function doesn't
+ * allocate any memory for the vector.
+ * @param valueCount desired number of elements in the vector
+ */
+ @Override
+ public void setInitialCapacity(int valueCount) {
+ computeAndCheckBufferSize(valueCount);
+ lastValueCapacity = valueCount;
+ }
+
+ /**
+ * Get the current value capacity for the vector.
+ *
+ * @return number of elements that vector can hold.
+ */
+ @Override
+ public int getValueCapacity() {
+ return actualValueCapacity;
+ }
+
+ /**
+ * Call this if you change the capacity of valueBuffer or validityBuffer.
+ */
+ protected void refreshValueCapacity() {
+ actualValueCapacity = Math.min(getValueBufferValueCapacity(), getValidityBufferValueCapacity());
+ }
+
+ protected int getValueBufferValueCapacity() {
+ return capAtMaxInt(valueBuffer.capacity() / typeWidth);
+ }
+
+ protected int getValidityBufferValueCapacity() {
+ return capAtMaxInt(validityBuffer.capacity() * 8);
+ }
+
+ /**
+ * zero out the vector and the data in associated buffers.
+ */
+ @Override
+ public void zeroVector() {
+ initValidityBuffer();
+ initValueBuffer();
+ }
+
+ /* zero out the validity buffer */
+ private void initValidityBuffer() {
+ validityBuffer.setZero(0, validityBuffer.capacity());
+ }
+
+ /* zero out the data buffer */
+ private void initValueBuffer() {
+ valueBuffer.setZero(0, valueBuffer.capacity());
+ }
+
+ /**
+ * Reset the vector to initial state. Same as {@link #zeroVector()}.
+ * Note that this method doesn't release any memory.
+ */
+ @Override
+ public void reset() {
+ valueCount = 0;
+ zeroVector();
+ }
+
+ /**
+ * Close the vector and release the associated buffers.
+ */
+ @Override
+ public void close() {
+ clear();
+ }
+
+ /**
+ * Same as {@link #close()}.
+ */
+ @Override
+ public void clear() {
+ valueCount = 0;
+ validityBuffer = releaseBuffer(validityBuffer);
+ valueBuffer = releaseBuffer(valueBuffer);
+ refreshValueCapacity();
+ }
+
+ /* used to step down the memory allocation */
+ protected void incrementAllocationMonitor() {
+ if (allocationMonitor < 0) {
+ allocationMonitor = 0;
+ }
+ allocationMonitor++;
+ }
+
+ /* used to step up the memory allocation */
+ protected void decrementAllocationMonitor() {
+ if (allocationMonitor > 0) {
+ allocationMonitor = 0;
+ }
+ allocationMonitor--;
+ }
+
+ /**
+ * Same as {@link #allocateNewSafe()}.
+ */
+ @Override
+ public void allocateNew() {
+ allocateNew(lastValueCapacity);
+ }
+
+ /**
+ * Allocate memory for the vector. We internally use a default value count
+ * of 4096 to allocate memory for at least these many elements in the
+ * vector. See {@link #allocateNew(int)} for allocating memory for specific
+ * number of elements in the vector.
+ *
+ * @return false if memory allocation fails, true otherwise.
+ */
+ @Override
+ public boolean allocateNewSafe() {
+ try {
+ allocateNew(lastValueCapacity);
+ return true;
+ } catch (Exception e) {
+ return false;
+ }
+ }
+
+ /**
+ * Allocate memory for the vector to support storing at least the provided number of
+ * elements in the vector. This method must be called prior to using the ValueVector.
+ *
+ * @param valueCount the desired number of elements in the vector
+ * @throws org.apache.arrow.memory.OutOfMemoryException on error
+ */
+ public void allocateNew(int valueCount) {
+ computeAndCheckBufferSize(valueCount);
+
+ /* we are doing a new allocation -- release the current buffers */
+ clear();
+
+ try {
+ allocateBytes(valueCount);
+ } catch (Exception e) {
+ clear();
+ throw e;
+ }
+ }
+
+ /*
+ * Compute the buffer size required for 'valueCount', and check if it's within bounds.
+ */
+ private long computeAndCheckBufferSize(int valueCount) {
+ final long size = computeCombinedBufferSize(valueCount, typeWidth);
+ if (size > MAX_ALLOCATION_SIZE) {
+ throw new OversizedAllocationException("Memory required for vector capacity " +
+ valueCount +
+ " is (" + size + "), which is more than max allowed (" + MAX_ALLOCATION_SIZE + ")");
+ }
+ return size;
+ }
+
+ /**
+ * Actual memory allocation is done by this function. All the calculations
+ * and knowledge about what size to allocate is upto the callers of this
+ * method.
+ * Callers appropriately handle errors if memory allocation fails here.
+ * Callers should also take care of determining that desired size is
+ * within the bounds of max allocation allowed and any other error
+ * conditions.
+ */
+ private void allocateBytes(int valueCount) {
+ DataAndValidityBuffers buffers = allocFixedDataAndValidityBufs(valueCount, typeWidth);
+ valueBuffer = buffers.getDataBuf();
+ validityBuffer = buffers.getValidityBuf();
+ zeroVector();
+
+ refreshValueCapacity();
+ lastValueCapacity = getValueCapacity();
+ }
+
+ /**
+ * During splitAndTransfer, if we splitting from a random position within a byte,
+ * we can't just slice the source buffer so we have to explicitly allocate the
+ * validityBuffer of the target vector. This is unlike the databuffer which we can
+ * always slice for the target vector.
+ */
+ private void allocateValidityBuffer(final int validityBufferSize) {
+ validityBuffer = allocator.buffer(validityBufferSize);
+ validityBuffer.readerIndex(0);
+ refreshValueCapacity();
+ }
+
+ /**
+ * Get the potential buffer size for a particular number of records.
+ * @param count desired number of elements in the vector
+ * @return estimated size of underlying buffers if the vector holds
+ * a given number of elements
+ */
+ @Override
+ public int getBufferSizeFor(final int count) {
+ if (count == 0) {
+ return 0;
+ }
+ return (count * typeWidth) + getValidityBufferSizeFromCount(count);
+ }
+
+ /**
+ * Get the size (number of bytes) of underlying buffers used by this
+ * vector.
+ * @return size of underlying buffers.
+ */
+ @Override
+ public int getBufferSize() {
+ if (valueCount == 0) {
+ return 0;
+ }
+ return (valueCount * typeWidth) + getValidityBufferSizeFromCount(valueCount);
+ }
+
+ /**
+ * Get information about how this field is materialized.
+ * @return the field corresponding to this vector
+ */
+ @Override
+ public Field getField() {
+ return field;
+ }
+
+ /**
+ * Return the underlying buffers associated with this vector. Note that this doesn't
+ * impact the reference counts for this buffer so it only should be used for in-context
+ * access. Also note that this buffer changes regularly thus
+ * external classes shouldn't hold a reference to it (unless they change it).
+ *
+ * @param clear Whether to clear vector before returning; the buffers will still be refcounted
+ * but the returned array will be the only reference to them
+ * @return The underlying {@link ArrowBuf buffers} that is used by this
+ * vector instance.
+ */
+ @Override
+ public ArrowBuf[] getBuffers(boolean clear) {
+ final ArrowBuf[] buffers;
+ setReaderAndWriterIndex();
+ if (getBufferSize() == 0) {
+ buffers = new ArrowBuf[0];
+ } else {
+ buffers = new ArrowBuf[2];
+ buffers[0] = validityBuffer;
+ buffers[1] = valueBuffer;
+ }
+ if (clear) {
+ for (final ArrowBuf buffer : buffers) {
+ buffer.getReferenceManager().retain(1);
+ }
+ clear();
+ }
+ return buffers;
+ }
+
+ /**
+ * Resize the vector to increase the capacity. The internal behavior is to
+ * double the current value capacity.
+ */
+ @Override
+ public void reAlloc() {
+ int targetValueCount = getValueCapacity() * 2;
+ if (targetValueCount == 0) {
+ if (lastValueCapacity > 0) {
+ targetValueCount = lastValueCapacity;
+ } else {
+ targetValueCount = INITIAL_VALUE_ALLOCATION * 2;
+ }
+ }
+ computeAndCheckBufferSize(targetValueCount);
+
+ DataAndValidityBuffers buffers = allocFixedDataAndValidityBufs(targetValueCount, typeWidth);
+ final ArrowBuf newValueBuffer = buffers.getDataBuf();
+ newValueBuffer.setBytes(0, valueBuffer, 0, valueBuffer.capacity());
+ newValueBuffer.setZero(valueBuffer.capacity(), newValueBuffer.capacity() - valueBuffer.capacity());
+ valueBuffer.getReferenceManager().release();
+ valueBuffer = newValueBuffer;
+
+ final ArrowBuf newValidityBuffer = buffers.getValidityBuf();
+ newValidityBuffer.setBytes(0, validityBuffer, 0, validityBuffer.capacity());
+ newValidityBuffer.setZero(validityBuffer.capacity(), newValidityBuffer.capacity() - validityBuffer.capacity());
+ validityBuffer.getReferenceManager().release();
+ validityBuffer = newValidityBuffer;
+
+ refreshValueCapacity();
+ lastValueCapacity = getValueCapacity();
+ }
+
+ /**
+ * Get the inner vectors.
+ *
+ * @deprecated This API will be removed as the current implementations no longer support inner vectors.
+ *
+ * @return the inner vectors for this field as defined by the TypeLayout
+ */
+ @Deprecated
+ @Override
+ public List<BufferBacked> getFieldInnerVectors() {
+ throw new UnsupportedOperationException("There are no inner vectors. Use getFieldBuffers");
+ }
+
+ /**
+ * Initialize the children in schema for this Field. This operation is a
+ * NO-OP for scalar types since they don't have any children.
+ * @param children the schema
+ * @throws IllegalArgumentException if children is a non-empty list for scalar types.
+ */
+ @Override
+ public void initializeChildrenFromFields(List<Field> children) {
+ if (!children.isEmpty()) {
+ throw new IllegalArgumentException("primitive type vector can not have children");
+ }
+ }
+
+ /**
+ * Get the inner child vectors.
+ * @return list of child vectors for complex types, empty list for scalar vector types
+ */
+ @Override
+ public List<FieldVector> getChildrenFromFields() {
+ return Collections.emptyList();
+ }
+
+ /**
+ * Load the buffers of this vector with provided source buffers.
+ * The caller manages the source buffers and populates them before invoking
+ * this method.
+ * @param fieldNode the fieldNode indicating the value count
+ * @param ownBuffers the buffers for this Field (own buffers only, children not included)
+ */
+ @Override
+ public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers) {
+ if (ownBuffers.size() != 2) {
+ throw new IllegalArgumentException("Illegal buffer count, expected " + 2 + ", got: " + ownBuffers.size());
+ }
+
+ ArrowBuf bitBuffer = ownBuffers.get(0);
+ ArrowBuf dataBuffer = ownBuffers.get(1);
+
+ validityBuffer.getReferenceManager().release();
+ validityBuffer = BitVectorHelper.loadValidityBuffer(fieldNode, bitBuffer, allocator);
+ valueBuffer.getReferenceManager().release();
+ valueBuffer = dataBuffer.getReferenceManager().retain(dataBuffer, allocator);
+ refreshValueCapacity();
+
+ valueCount = fieldNode.getLength();
+ }
+
+ /**
+ * Get the buffers belonging to this vector.
+ *
+ * @return the inner buffers.
+ */
+ public List<ArrowBuf> getFieldBuffers() {
+ List<ArrowBuf> result = new ArrayList<>(2);
+ setReaderAndWriterIndex();
+ result.add(validityBuffer);
+ result.add(valueBuffer);
+
+ return result;
+ }
+
+ /**
+ * Set the reader and writer indexes for the inner buffers.
+ */
+ private void setReaderAndWriterIndex() {
+ validityBuffer.readerIndex(0);
+ valueBuffer.readerIndex(0);
+ if (valueCount == 0) {
+ validityBuffer.writerIndex(0);
+ valueBuffer.writerIndex(0);
+ } else {
+ validityBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount));
+ if (typeWidth == 0) {
+ /* specialized handling for BitVector */
+ valueBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount));
+ } else {
+ valueBuffer.writerIndex((long) valueCount * typeWidth);
+ }
+ }
+ }
+
+ /**
+ * Construct a transfer pair of this vector and another vector of same type.
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @param callBack not used
+ * @return TransferPair
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) {
+ return getTransferPair(ref, allocator);
+ }
+
+ /**
+ * Construct a transfer pair of this vector and another vector of same type.
+ * @param allocator allocator for the target vector
+ * @return TransferPair
+ */
+ @Override
+ public TransferPair getTransferPair(BufferAllocator allocator) {
+ return getTransferPair(getName(), allocator);
+ }
+
+ /**
+ * Construct a transfer pair of this vector and another vector of same type.
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return TransferPair
+ */
+ public abstract TransferPair getTransferPair(String ref, BufferAllocator allocator);
+
+ /**
+ * Transfer this vector'data to another vector. The memory associated
+ * with this vector is transferred to the allocator of target vector
+ * for accounting and management purposes.
+ * @param target destination vector for transfer
+ */
+ public void transferTo(BaseFixedWidthVector target) {
+ compareTypes(target, "transferTo");
+ target.clear();
+ target.validityBuffer = transferBuffer(validityBuffer, target.allocator);
+ target.valueBuffer = transferBuffer(valueBuffer, target.allocator);
+ target.valueCount = valueCount;
+ target.refreshValueCapacity();
+ clear();
+ }
+
+ /**
+ * Slice this vector at desired index and length and transfer the
+ * corresponding data to the target vector.
+ * @param startIndex start position of the split in source vector.
+ * @param length length of the split.
+ * @param target destination vector
+ */
+ public void splitAndTransferTo(int startIndex, int length,
+ BaseFixedWidthVector target) {
+ Preconditions.checkArgument(startIndex >= 0 && length >= 0 && startIndex + length <= valueCount,
+ "Invalid parameters startIndex: %s, length: %s for valueCount: %s", startIndex, length, valueCount);
+ compareTypes(target, "splitAndTransferTo");
+ target.clear();
+ splitAndTransferValidityBuffer(startIndex, length, target);
+ splitAndTransferValueBuffer(startIndex, length, target);
+ target.setValueCount(length);
+ }
+
+ /**
+ * Data buffer can always be split and transferred using slicing.
+ */
+ private void splitAndTransferValueBuffer(int startIndex, int length,
+ BaseFixedWidthVector target) {
+ final int startPoint = startIndex * typeWidth;
+ final int sliceLength = length * typeWidth;
+ final ArrowBuf slicedBuffer = valueBuffer.slice(startPoint, sliceLength);
+ target.valueBuffer = transferBuffer(slicedBuffer, target.allocator);
+ target.refreshValueCapacity();
+ }
+
+ /**
+ * Validity buffer has multiple cases of split and transfer depending on
+ * the starting position of the source index.
+ */
+ private void splitAndTransferValidityBuffer(int startIndex, int length,
+ BaseFixedWidthVector target) {
+ int firstByteSource = BitVectorHelper.byteIndex(startIndex);
+ int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1);
+ int byteSizeTarget = getValidityBufferSizeFromCount(length);
+ int offset = startIndex % 8;
+
+ if (length > 0) {
+ if (offset == 0) {
+ /* slice */
+ if (target.validityBuffer != null) {
+ target.validityBuffer.getReferenceManager().release();
+ }
+ target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget);
+ target.validityBuffer.getReferenceManager().retain(1);
+ target.refreshValueCapacity();
+ } else {
+ /* Copy data
+ * When the first bit starts from the middle of a byte (offset != 0),
+ * copy data from src BitVector.
+ * Each byte in the target is composed by a part in i-th byte,
+ * another part in (i+1)-th byte.
+ */
+ target.allocateValidityBuffer(byteSizeTarget);
+
+ for (int i = 0; i < byteSizeTarget - 1; i++) {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(this.validityBuffer,
+ firstByteSource + i, offset);
+ byte b2 = BitVectorHelper.getBitsFromNextByte(this.validityBuffer,
+ firstByteSource + i + 1, offset);
+
+ target.validityBuffer.setByte(i, (b1 + b2));
+ }
+
+ /* Copying the last piece is done in the following manner:
+ * if the source vector has 1 or more bytes remaining, we copy
+ * the last piece as a byte formed by shifting data
+ * from the current byte and the next byte.
+ *
+ * if the source vector has no more bytes remaining
+ * (we are at the last byte), we copy the last piece as a byte
+ * by shifting data from the current byte.
+ */
+ if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(this.validityBuffer,
+ firstByteSource + byteSizeTarget - 1, offset);
+ byte b2 = BitVectorHelper.getBitsFromNextByte(this.validityBuffer,
+ firstByteSource + byteSizeTarget, offset);
+
+ target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2);
+ } else {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(this.validityBuffer,
+ firstByteSource + byteSizeTarget - 1, offset);
+ target.validityBuffer.setByte(byteSizeTarget - 1, b1);
+ }
+ }
+ }
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | common getters and setters |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the number of elements that are null in the vector.
+ *
+ * @return the number of null elements.
+ */
+ @Override
+ public int getNullCount() {
+ return BitVectorHelper.getNullCount(validityBuffer, valueCount);
+ }
+
+ /**
+ * Get the value count of vector. This will always be zero unless
+ * {@link #setValueCount(int)} has been called prior to calling this.
+ *
+ * @return valueCount for the vector
+ */
+ @Override
+ public int getValueCount() {
+ return valueCount;
+ }
+
+ /**
+ * Set value count for the vector.
+ *
+ * @param valueCount value count to set
+ */
+ @Override
+ public void setValueCount(int valueCount) {
+ this.valueCount = valueCount;
+ final int currentValueCapacity = getValueCapacity();
+ while (valueCount > getValueCapacity()) {
+ reAlloc();
+ }
+ /*
+ * We are trying to understand the pattern of memory allocation.
+ * If initially, the user did vector.allocateNew(), we would have
+ * allocated memory of default size (4096 * type width).
+ * Later on user invokes setValueCount(count).
+ *
+ * If the existing value capacity is twice as large as the
+ * valueCount, we know that we over-provisioned memory in the
+ * first place when default memory allocation was done because user
+ * really needs a much less value count in the vector.
+ *
+ * We record this by bumping up the allocationMonitor. If this pattern
+ * happens for certain number of times and allocationMonitor
+ * reaches the threshold (internal hardcoded) value, subsequent
+ * call to allocateNew() will take care of stepping down the
+ * default memory allocation size.
+ *
+ * Another case would be under-provisioning the initial memory and
+ * thus going through a lot of realloc(). Here the goal is to
+ * see if we can minimize the number of reallocations. Again the
+ * state is recorded in allocationMonitor by decrementing it
+ * (negative value). If a threshold is hit, realloc will try to
+ * allocate more memory in order to possibly avoid a future realloc.
+ * This case is also applicable to setSafe() methods which can trigger
+ * a realloc() and thus we record the state there as well.
+ */
+ if (valueCount > 0) {
+ if (currentValueCapacity >= (valueCount * 2)) {
+ incrementAllocationMonitor();
+ } else if (currentValueCapacity <= (valueCount / 2)) {
+ decrementAllocationMonitor();
+ }
+ }
+ setReaderAndWriterIndex();
+ }
+
+ /**
+ * Check if the given index is within the current value capacity
+ * of the vector.
+ *
+ * @param index position to check
+ * @return true if index is within the current value capacity
+ */
+ public boolean isSafe(int index) {
+ return index < getValueCapacity();
+ }
+
+ /**
+ * Check if element at given index is null.
+ *
+ * @param index position of element
+ * @return true if element at given index is null, false otherwise
+ */
+ @Override
+ public boolean isNull(int index) {
+ return (isSet(index) == 0);
+ }
+
+ /**
+ * Same as {@link #isNull(int)}.
+ *
+ * @param index position of element
+ * @return 1 if element at given index is not null, 0 otherwise
+ */
+ public int isSet(int index) {
+ final int byteIndex = index >> 3;
+ final byte b = validityBuffer.getByte(byteIndex);
+ final int bitIndex = index & 7;
+ return (b >> bitIndex) & 0x01;
+ }
+
+ /**
+ * Mark the particular position in the vector as non-null.
+ *
+ * @param index position of the element.
+ */
+ @Override
+ public void setIndexDefined(int index) {
+ handleSafe(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ }
+
+ public void set(int index, byte[] value, int start, int length) {
+ throw new UnsupportedOperationException();
+ }
+
+ public void setSafe(int index, byte[] value, int start, int length) {
+ throw new UnsupportedOperationException();
+ }
+
+ public void set(int index, ByteBuffer value, int start, int length) {
+ throw new UnsupportedOperationException();
+ }
+
+ public void setSafe(int index, ByteBuffer value, int start, int length) {
+ throw new UnsupportedOperationException();
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | helper methods for setters |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ protected void handleSafe(int index) {
+ while (index >= getValueCapacity()) {
+ decrementAllocationMonitor();
+ reAlloc();
+ }
+ }
+
+ /**
+ * Copy a cell value from a particular index in source vector to a particular
+ * position in this vector. The source vector should be of the same type as this one.
+ *
+ * @param fromIndex position to copy from in source vector
+ * @param thisIndex position to copy to in this vector
+ * @param from source vector
+ */
+ @Override
+ public void copyFrom(int fromIndex, int thisIndex, ValueVector from) {
+ Preconditions.checkArgument(this.getMinorType() == from.getMinorType());
+ if (from.isNull(fromIndex)) {
+ BitVectorHelper.unsetBit(this.getValidityBuffer(), thisIndex);
+ } else {
+ BitVectorHelper.setBit(this.getValidityBuffer(), thisIndex);
+ PlatformDependent.copyMemory(from.getDataBuffer().memoryAddress() + (long) fromIndex * typeWidth,
+ this.getDataBuffer().memoryAddress() + (long) thisIndex * typeWidth, typeWidth);
+ }
+ }
+
+ /**
+ * Same as {@link #copyFrom(int, int, ValueVector)} except that
+ * it handles the case when the capacity of the vector needs to be expanded
+ * before copy.
+ *
+ * @param fromIndex position to copy from in source vector
+ * @param thisIndex position to copy to in this vector
+ * @param from source vector
+ */
+ @Override
+ public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) {
+ Preconditions.checkArgument(this.getMinorType() == from.getMinorType());
+ handleSafe(thisIndex);
+ copyFrom(fromIndex, thisIndex, from);
+ }
+
+ /**
+ * Set the element at the given index to null.
+ *
+ * @param index position of element
+ */
+ public void setNull(int index) {
+ handleSafe(index);
+ // not really needed to set the bit to 0 as long as
+ // the buffer always starts from 0.
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+
+ @Override
+ public ArrowBufPointer getDataPointer(int index) {
+ return getDataPointer(index, new ArrowBufPointer());
+ }
+
+ @Override
+ public ArrowBufPointer getDataPointer(int index, ArrowBufPointer reuse) {
+ if (isNull(index)) {
+ reuse.set(null, 0, 0);
+ } else {
+ reuse.set(valueBuffer, (long) index * typeWidth, typeWidth);
+ }
+ return reuse;
+ }
+
+ @Override
+ public int hashCode(int index) {
+ return hashCode(index, null);
+ }
+
+ @Override
+ public int hashCode(int index, ArrowBufHasher hasher) {
+ if (isNull(index)) {
+ return ArrowBufPointer.NULL_HASH_CODE;
+ }
+ long start = (long) typeWidth * index;
+ long end = (long) typeWidth * (index + 1);
+ return ByteFunctionHelpers.hash(hasher, this.getDataBuffer(), start, end);
+ }
+
+ @Override
+ public <OUT, IN> OUT accept(VectorVisitor<OUT, IN> visitor, IN value) {
+ return visitor.visit(this, value);
+ }
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BaseIntVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BaseIntVector.java
new file mode 100644
index 000000000..556411c86
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BaseIntVector.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+/**
+ * Interface for all int type vectors.
+ */
+public interface BaseIntVector extends FieldVector {
+
+ /**
+ * Sets the value at index, note this value may need to be need truncated.
+ * Note this is safe version (i.e. call setSafe method in vector)
+ */
+ void setWithPossibleTruncate(int index, long value);
+
+ /**
+ * Sets the value at index, note this value may need to be need truncated.
+ * Note this is unsafe version (i.e. call set method in vector)
+ */
+ void setUnsafeWithPossibleTruncate(int index, long value);
+
+ /**
+ * Gets the value at index.
+ * This value may have been extended to long and will throw {@link NullPointerException}
+ * if the value is null. Note null check could be turned off via {@link NullCheckingForGet}.
+ */
+ long getValueAsLong(int index);
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java
new file mode 100644
index 000000000..90694db83
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java
@@ -0,0 +1,1370 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.OutOfMemoryException;
+import org.apache.arrow.memory.util.ArrowBufPointer;
+import org.apache.arrow.memory.util.ByteFunctionHelpers;
+import org.apache.arrow.memory.util.CommonUtil;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.OversizedAllocationException;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * BaseLargeVariableWidthVector is a base class providing functionality for large strings/large bytes types.
+ */
+public abstract class BaseLargeVariableWidthVector extends BaseValueVector
+ implements VariableWidthVector, FieldVector, VectorDefinitionSetter {
+ private static final int DEFAULT_RECORD_BYTE_COUNT = 12;
+ private static final int INITIAL_BYTE_COUNT = INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT;
+ private int lastValueCapacity;
+ private long lastValueAllocationSizeInBytes;
+
+ /* protected members */
+ public static final int OFFSET_WIDTH = 8; /* 8 byte unsigned int to track offsets */
+ protected static final byte[] emptyByteArray = new byte[]{};
+ protected ArrowBuf validityBuffer;
+ protected ArrowBuf valueBuffer;
+ protected ArrowBuf offsetBuffer;
+ protected int valueCount;
+ protected int lastSet;
+ protected final Field field;
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param field The field materialized by this vector.
+ * @param allocator The allocator to use for creating/resizing buffers
+ */
+ public BaseLargeVariableWidthVector(Field field, final BufferAllocator allocator) {
+ super(allocator);
+ this.field = field;
+ lastValueAllocationSizeInBytes = INITIAL_BYTE_COUNT;
+ // -1 because we require one extra slot for the offset array.
+ lastValueCapacity = INITIAL_VALUE_ALLOCATION - 1;
+ valueCount = 0;
+ lastSet = -1;
+ offsetBuffer = allocator.getEmpty();
+ validityBuffer = allocator.getEmpty();
+ valueBuffer = allocator.getEmpty();
+ }
+
+ @Override
+ public String getName() {
+ return field.getName();
+ }
+
+ /**
+ * Get buffer that manages the validity (NULL or NON-NULL nature) of
+ * elements in the vector. Consider it as a buffer for internal bit vector
+ * data structure.
+ * @return buffer
+ */
+ @Override
+ public ArrowBuf getValidityBuffer() {
+ return validityBuffer;
+ }
+
+ /**
+ * Get the buffer that stores the data for elements in the vector.
+ * @return buffer
+ */
+ @Override
+ public ArrowBuf getDataBuffer() {
+ return valueBuffer;
+ }
+
+ /**
+ * buffer that stores the offsets for elements
+ * in the vector. This operation is not supported for fixed-width vectors.
+ * @return buffer
+ */
+ @Override
+ public ArrowBuf getOffsetBuffer() {
+ return offsetBuffer;
+ }
+
+ /**
+ * Get the memory address of buffer that stores the offsets for elements
+ * in the vector.
+ * @return starting address of the buffer
+ */
+ @Override
+ public long getOffsetBufferAddress() {
+ return offsetBuffer.memoryAddress();
+ }
+
+ /**
+ * Get the memory address of buffer that manages the validity
+ * (NULL or NON-NULL nature) of elements in the vector.
+ * @return starting address of the buffer
+ */
+ @Override
+ public long getValidityBufferAddress() {
+ return validityBuffer.memoryAddress();
+ }
+
+ /**
+ * Get the memory address of buffer that stores the data for elements
+ * in the vector.
+ * @return starting address of the buffer
+ */
+ @Override
+ public long getDataBufferAddress() {
+ return valueBuffer.memoryAddress();
+ }
+
+ /**
+ * Sets the desired value capacity for the vector. This function doesn't
+ * allocate any memory for the vector.
+ * @param valueCount desired number of elements in the vector
+ */
+ @Override
+ public void setInitialCapacity(int valueCount) {
+ final long size = (long) valueCount * DEFAULT_RECORD_BYTE_COUNT;
+ checkDataBufferSize(size);
+ computeAndCheckOffsetsBufferSize(valueCount);
+ lastValueAllocationSizeInBytes = size;
+ lastValueCapacity = valueCount;
+ }
+
+ /**
+ * Sets the desired value capacity for the vector. This function doesn't
+ * allocate any memory for the vector.
+ * @param valueCount desired number of elements in the vector
+ * @param density average number of bytes per variable width element
+ */
+ @Override
+ public void setInitialCapacity(int valueCount, double density) {
+ long size = Math.max((long) (valueCount * density), 1L);
+ checkDataBufferSize(size);
+ computeAndCheckOffsetsBufferSize(valueCount);
+ lastValueAllocationSizeInBytes = size;
+ lastValueCapacity = valueCount;
+ }
+
+ /**
+ * Get the density of this ListVector.
+ * @return density
+ */
+ public double getDensity() {
+ if (valueCount == 0) {
+ return 0.0D;
+ }
+ final long startOffset = offsetBuffer.getLong(0);
+ final long endOffset = offsetBuffer.getLong((long) valueCount * OFFSET_WIDTH);
+ final double totalListSize = endOffset - startOffset;
+ return totalListSize / valueCount;
+ }
+
+ /**
+ * Get the current capacity which does not exceed either validity buffer or offset buffer.
+ * Note: Here the `getValueCapacity` has no relationship with the value buffer.
+ * @return number of elements that vector can hold.
+ */
+ @Override
+ public int getValueCapacity() {
+ final long offsetValueCapacity = Math.max(getOffsetBufferValueCapacity() - 1, 0);
+ return capAtMaxInt(Math.min(offsetValueCapacity, getValidityBufferValueCapacity()));
+ }
+
+ private long getValidityBufferValueCapacity() {
+ return validityBuffer.capacity() * 8;
+ }
+
+ private long getOffsetBufferValueCapacity() {
+ return offsetBuffer.capacity() / OFFSET_WIDTH;
+ }
+
+ /**
+ * zero out the vector and the data in associated buffers.
+ */
+ public void zeroVector() {
+ initValidityBuffer();
+ initOffsetBuffer();
+ valueBuffer.setZero(0, valueBuffer.capacity());
+ }
+
+ /* zero out the validity buffer */
+ private void initValidityBuffer() {
+ validityBuffer.setZero(0, validityBuffer.capacity());
+ }
+
+ /* zero out the offset buffer */
+ private void initOffsetBuffer() {
+ offsetBuffer.setZero(0, offsetBuffer.capacity());
+ }
+
+ /**
+ * Reset the vector to initial state. Same as {@link #zeroVector()}.
+ * Note that this method doesn't release any memory.
+ */
+ public void reset() {
+ zeroVector();
+ lastSet = -1;
+ valueCount = 0;
+ }
+
+ /**
+ * Close the vector and release the associated buffers.
+ */
+ @Override
+ public void close() {
+ clear();
+ }
+
+ /**
+ * Same as {@link #close()}.
+ */
+ @Override
+ public void clear() {
+ validityBuffer = releaseBuffer(validityBuffer);
+ valueBuffer = releaseBuffer(valueBuffer);
+ offsetBuffer = releaseBuffer(offsetBuffer);
+ lastSet = -1;
+ valueCount = 0;
+ }
+
+ /**
+ * Get the inner vectors.
+ *
+ * @deprecated This API will be removed as the current implementations no longer support inner vectors.
+ *
+ * @return the inner vectors for this field as defined by the TypeLayout
+ */
+ @Override
+ @Deprecated
+ public List<BufferBacked> getFieldInnerVectors() {
+ throw new UnsupportedOperationException("There are no inner vectors. Use getFieldBuffers");
+ }
+
+ /**
+ * Initialize the children in schema for this Field. This operation is a
+ * NO-OP for scalar types since they don't have any children.
+ * @param children the schema
+ * @throws IllegalArgumentException if children is a non-empty list for scalar types.
+ */
+ @Override
+ public void initializeChildrenFromFields(List<Field> children) {
+ if (!children.isEmpty()) {
+ throw new IllegalArgumentException("primitive type vector can not have children");
+ }
+ }
+
+ /**
+ * Get the inner child vectors.
+ * @return list of child vectors for complex types, empty list for scalar vector types
+ */
+ @Override
+ public List<FieldVector> getChildrenFromFields() {
+ return Collections.emptyList();
+ }
+
+
+ /**
+ * Load the buffers of this vector with provided source buffers.
+ * The caller manages the source buffers and populates them before invoking
+ * this method.
+ * @param fieldNode the fieldNode indicating the value count
+ * @param ownBuffers the buffers for this Field (own buffers only, children not included)
+ */
+ @Override
+ public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers) {
+ ArrowBuf bitBuffer = ownBuffers.get(0);
+ ArrowBuf offBuffer = ownBuffers.get(1);
+ ArrowBuf dataBuffer = ownBuffers.get(2);
+
+ validityBuffer.getReferenceManager().release();
+ validityBuffer = BitVectorHelper.loadValidityBuffer(fieldNode, bitBuffer, allocator);
+ offsetBuffer.getReferenceManager().release();
+ offsetBuffer = offBuffer.getReferenceManager().retain(offBuffer, allocator);
+ valueBuffer.getReferenceManager().release();
+ valueBuffer = dataBuffer.getReferenceManager().retain(dataBuffer, allocator);
+
+ lastSet = fieldNode.getLength() - 1;
+ valueCount = fieldNode.getLength();
+ }
+
+ /**
+ * Get the buffers belonging to this vector.
+ * @return the inner buffers.
+ */
+ public List<ArrowBuf> getFieldBuffers() {
+ // before flight/IPC, we must bring the vector to a consistent state.
+ // this is because, it is possible that the offset buffers of some trailing values
+ // are not updated. this may cause some data in the data buffer being lost.
+ // for details, please see TestValueVector#testUnloadVariableWidthVector.
+ fillHoles(valueCount);
+
+ List<ArrowBuf> result = new ArrayList<>(3);
+ setReaderAndWriterIndex();
+ result.add(validityBuffer);
+ result.add(offsetBuffer);
+ result.add(valueBuffer);
+
+ return result;
+ }
+
+ /**
+ * Set the reader and writer indexes for the inner buffers.
+ */
+ private void setReaderAndWriterIndex() {
+ validityBuffer.readerIndex(0);
+ offsetBuffer.readerIndex(0);
+ valueBuffer.readerIndex(0);
+ if (valueCount == 0) {
+ validityBuffer.writerIndex(0);
+ offsetBuffer.writerIndex(0);
+ valueBuffer.writerIndex(0);
+ } else {
+ final long lastDataOffset = getStartOffset(valueCount);
+ validityBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount));
+ offsetBuffer.writerIndex((long) (valueCount + 1) * OFFSET_WIDTH);
+ valueBuffer.writerIndex(lastDataOffset);
+ }
+ }
+
+ /**
+ * Same as {@link #allocateNewSafe()}.
+ */
+ @Override
+ public void allocateNew() {
+ allocateNew(lastValueAllocationSizeInBytes, lastValueCapacity);
+ }
+
+ /**
+ * Allocate memory for the vector. We internally use a default value count
+ * of 4096 to allocate memory for at least these many elements in the
+ * vector. See {@link #allocateNew(long, int)} for allocating memory for specific
+ * number of elements in the vector.
+ *
+ * @return false if memory allocation fails, true otherwise.
+ */
+ @Override
+ public boolean allocateNewSafe() {
+ try {
+ allocateNew(lastValueAllocationSizeInBytes, lastValueCapacity);
+ return true;
+ } catch (Exception e) {
+ return false;
+ }
+ }
+
+ /**
+ * Allocate memory for the vector to support storing at least the provided number of
+ * elements in the vector. This method must be called prior to using the ValueVector.
+ *
+ * @param totalBytes desired total memory capacity
+ * @param valueCount the desired number of elements in the vector
+ * @throws org.apache.arrow.memory.OutOfMemoryException if memory allocation fails
+ */
+ @Override
+ public void allocateNew(long totalBytes, int valueCount) {
+ assert totalBytes >= 0;
+
+ checkDataBufferSize(totalBytes);
+ computeAndCheckOffsetsBufferSize(valueCount);
+
+ /* we are doing a new allocation -- release the current buffers */
+ clear();
+
+ try {
+ allocateBytes(totalBytes, valueCount);
+ } catch (Exception e) {
+ clear();
+ throw e;
+ }
+ }
+
+ @Override
+ public void allocateNew(int valueCount) {
+ allocateNew(lastValueAllocationSizeInBytes, valueCount);
+ }
+
+ /* Check if the data buffer size is within bounds. */
+ private void checkDataBufferSize(long size) {
+ if (size > MAX_ALLOCATION_SIZE || size < 0) {
+ throw new OversizedAllocationException("Memory required for vector " +
+ " is (" + size + "), which is more than max allowed (" + MAX_ALLOCATION_SIZE + ")");
+ }
+ }
+
+ /**
+ * Compute the buffer size required for 'valueCount' offsets and validity, and check if it's
+ * within bounds.
+ */
+ private long computeAndCheckOffsetsBufferSize(int valueCount) {
+ /* to track the end offset of last data element in vector, we need
+ * an additional slot in offset buffer.
+ */
+ final long size = computeCombinedBufferSize(valueCount + 1, OFFSET_WIDTH);
+ if (size > MAX_ALLOCATION_SIZE) {
+ throw new OversizedAllocationException("Memory required for vector capacity " +
+ valueCount +
+ " is (" + size + "), which is more than max allowed (" + MAX_ALLOCATION_SIZE + ")");
+ }
+ return size;
+ }
+
+ /* allocate the inner buffers */
+ private void allocateBytes(final long valueBufferSize, final int valueCount) {
+ /* allocate data buffer */
+ long curSize = valueBufferSize;
+ valueBuffer = allocator.buffer(curSize);
+ valueBuffer.readerIndex(0);
+
+ /* allocate offset buffer and validity buffer */
+ DataAndValidityBuffers buffers = allocFixedDataAndValidityBufs(valueCount + 1, OFFSET_WIDTH);
+ offsetBuffer = buffers.getDataBuf();
+ validityBuffer = buffers.getValidityBuf();
+ initOffsetBuffer();
+ initValidityBuffer();
+
+ lastValueCapacity = getValueCapacity();
+ lastValueAllocationSizeInBytes = capAtMaxInt(valueBuffer.capacity());
+ }
+
+ /* allocate offset buffer */
+ private void allocateOffsetBuffer(final long size) {
+ offsetBuffer = allocator.buffer(size);
+ offsetBuffer.readerIndex(0);
+ initOffsetBuffer();
+ }
+
+ /* allocate validity buffer */
+ private void allocateValidityBuffer(final long size) {
+ validityBuffer = allocator.buffer(size);
+ validityBuffer.readerIndex(0);
+ initValidityBuffer();
+ }
+
+ /**
+ * Resize the vector to increase the capacity. The internal behavior is to
+ * double the current value capacity.
+ */
+ public void reAlloc() {
+ reallocDataBuffer();
+ reallocValidityAndOffsetBuffers();
+ }
+
+ /**
+ * Reallocate the data buffer. Data Buffer stores the actual data for
+ * LARGEVARCHAR or LARGEVARBINARY elements in the vector. The behavior is to double
+ * the size of buffer.
+ * @throws OversizedAllocationException if the desired new size is more than
+ * max allowed
+ * @throws OutOfMemoryException if the internal memory allocation fails
+ */
+ public void reallocDataBuffer() {
+ final long currentBufferCapacity = valueBuffer.capacity();
+ long newAllocationSize = currentBufferCapacity * 2;
+ if (newAllocationSize == 0) {
+ if (lastValueAllocationSizeInBytes > 0) {
+ newAllocationSize = lastValueAllocationSizeInBytes;
+ } else {
+ newAllocationSize = INITIAL_BYTE_COUNT * 2;
+ }
+ }
+ newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize);
+ assert newAllocationSize >= 1;
+
+ checkDataBufferSize(newAllocationSize);
+
+ final ArrowBuf newBuf = allocator.buffer(newAllocationSize);
+ newBuf.setBytes(0, valueBuffer, 0, currentBufferCapacity);
+ valueBuffer.getReferenceManager().release();
+ valueBuffer = newBuf;
+ lastValueAllocationSizeInBytes = valueBuffer.capacity();
+ }
+
+ /**
+ * Reallocate the validity and offset buffers for this vector. Validity
+ * buffer is used to track the NULL or NON-NULL nature of elements in
+ * the vector and offset buffer is used to store the lengths of variable
+ * width elements in the vector.
+ *
+ * <p>Note that data buffer for variable length vectors moves independent
+ * of the companion validity and offset buffers. This is in
+ * contrast to what we have for fixed width vectors.
+ *
+ * <p>So even though we may have setup an initial capacity of 1024
+ * elements in the vector, it is quite possible
+ * that we need to reAlloc() the data buffer when we are setting
+ * the 5th element in the vector simply because previous
+ * variable length elements have exhausted the buffer capacity.
+ * However, we really don't need to reAlloc() validity and
+ * offset buffers until we try to set the 1025th element
+ * This is why we do a separate check for safe methods to
+ * determine which buffer needs reallocation.
+ * @throws OversizedAllocationException if the desired new size is more than
+ * max allowed
+ * @throws OutOfMemoryException if the internal memory allocation fails
+ */
+ public void reallocValidityAndOffsetBuffers() {
+ int targetOffsetCount = capAtMaxInt((offsetBuffer.capacity() / OFFSET_WIDTH) * 2);
+ if (targetOffsetCount == 0) {
+ if (lastValueCapacity > 0) {
+ targetOffsetCount = (lastValueCapacity + 1);
+ } else {
+ targetOffsetCount = 2 * (INITIAL_VALUE_ALLOCATION + 1);
+ }
+ }
+ computeAndCheckOffsetsBufferSize(targetOffsetCount);
+
+ DataAndValidityBuffers buffers = allocFixedDataAndValidityBufs(targetOffsetCount, OFFSET_WIDTH);
+ final ArrowBuf newOffsetBuffer = buffers.getDataBuf();
+ newOffsetBuffer.setBytes(0, offsetBuffer, 0, offsetBuffer.capacity());
+ newOffsetBuffer.setZero(offsetBuffer.capacity(), newOffsetBuffer.capacity() - offsetBuffer.capacity());
+ offsetBuffer.getReferenceManager().release();
+ offsetBuffer = newOffsetBuffer;
+
+ final ArrowBuf newValidityBuffer = buffers.getValidityBuf();
+ newValidityBuffer.setBytes(0, validityBuffer, 0, validityBuffer.capacity());
+ newValidityBuffer.setZero(validityBuffer.capacity(), newValidityBuffer.capacity() - validityBuffer.capacity());
+ validityBuffer.getReferenceManager().release();
+ validityBuffer = newValidityBuffer;
+
+ lastValueCapacity = getValueCapacity();
+ }
+
+ /**
+ * Get the size (number of bytes) of underlying data buffer.
+ * @return number of bytes in the data buffer
+ */
+ @Override
+ public int getByteCapacity() {
+ return capAtMaxInt(valueBuffer.capacity());
+ }
+
+ @Override
+ public int sizeOfValueBuffer() {
+ if (valueCount == 0) {
+ return 0;
+ }
+ return capAtMaxInt(offsetBuffer.getLong((long) valueCount * OFFSET_WIDTH));
+ }
+
+ /**
+ * Get the size (number of bytes) of underlying buffers used by this
+ * vector.
+ * @return size of underlying buffers.
+ */
+ @Override
+ public int getBufferSize() {
+ return getBufferSizeFor(this.valueCount);
+ }
+
+ /**
+ * Get the potential buffer size for a particular number of records.
+ * @param valueCount desired number of elements in the vector
+ * @return estimated size of underlying buffers if the vector holds
+ * a given number of elements
+ */
+ @Override
+ public int getBufferSizeFor(final int valueCount) {
+ if (valueCount == 0) {
+ return 0;
+ }
+
+ final long validityBufferSize = getValidityBufferSizeFromCount(valueCount);
+ final long offsetBufferSize = (long) (valueCount + 1) * OFFSET_WIDTH;
+ /* get the end offset for this valueCount */
+ final long dataBufferSize = offsetBuffer.getLong((long) valueCount * OFFSET_WIDTH);
+ return capAtMaxInt(validityBufferSize + offsetBufferSize + dataBufferSize);
+ }
+
+ /**
+ * Get information about how this field is materialized.
+ * @return the field corresponding to this vector
+ */
+ @Override
+ public Field getField() {
+ return field;
+ }
+
+ /**
+ * Return the underlying buffers associated with this vector. Note that this doesn't
+ * impact the reference counts for this buffer so it only should be used for in-context
+ * access. Also note that this buffer changes regularly thus
+ * external classes shouldn't hold a reference to it (unless they change it).
+ *
+ * @param clear Whether to clear vector before returning; the buffers will still be refcounted
+ * but the returned array will be the only reference to them
+ * @return The underlying {@link io.netty.buffer.ArrowBuf buffers} that is used by this
+ * vector instance.
+ */
+ @Override
+ public ArrowBuf[] getBuffers(boolean clear) {
+ final ArrowBuf[] buffers;
+ setReaderAndWriterIndex();
+ if (getBufferSize() == 0) {
+ buffers = new ArrowBuf[0];
+ } else {
+ buffers = new ArrowBuf[3];
+ buffers[0] = validityBuffer;
+ buffers[1] = offsetBuffer;
+ buffers[2] = valueBuffer;
+ }
+ if (clear) {
+ for (final ArrowBuf buffer : buffers) {
+ buffer.getReferenceManager().retain();
+ }
+ clear();
+ }
+ return buffers;
+ }
+
+ /**
+ * Construct a transfer pair of this vector and another vector of same type.
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @param callBack not used
+ * @return TransferPair
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) {
+ return getTransferPair(ref, allocator);
+ }
+
+ /**
+ * Construct a transfer pair of this vector and another vector of same type.
+ * @param allocator allocator for the target vector
+ * @return TransferPair
+ */
+ @Override
+ public TransferPair getTransferPair(BufferAllocator allocator) {
+ return getTransferPair(getName(), allocator);
+ }
+
+ /**
+ * Construct a transfer pair of this vector and another vector of same type.
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return TransferPair
+ */
+ public abstract TransferPair getTransferPair(String ref, BufferAllocator allocator);
+
+ /**
+ * Transfer this vector'data to another vector. The memory associated
+ * with this vector is transferred to the allocator of target vector
+ * for accounting and management purposes.
+ * @param target destination vector for transfer
+ */
+ public void transferTo(BaseLargeVariableWidthVector target) {
+ compareTypes(target, "transferTo");
+ target.clear();
+ target.validityBuffer = transferBuffer(validityBuffer, target.allocator);
+ target.valueBuffer = transferBuffer(valueBuffer, target.allocator);
+ target.offsetBuffer = transferBuffer(offsetBuffer, target.allocator);
+ target.setLastSet(this.lastSet);
+ if (this.valueCount > 0) {
+ target.setValueCount(this.valueCount);
+ }
+ clear();
+ }
+
+ /**
+ * Slice this vector at desired index and length and transfer the
+ * corresponding data to the target vector.
+ * @param startIndex start position of the split in source vector.
+ * @param length length of the split.
+ * @param target destination vector
+ */
+ public void splitAndTransferTo(int startIndex, int length,
+ BaseLargeVariableWidthVector target) {
+ Preconditions.checkArgument(startIndex >= 0 && startIndex < valueCount,
+ "Invalid startIndex: %s", startIndex);
+ Preconditions.checkArgument(startIndex + length <= valueCount,
+ "Invalid length: %s", length);
+ compareTypes(target, "splitAndTransferTo");
+ target.clear();
+ splitAndTransferValidityBuffer(startIndex, length, target);
+ splitAndTransferOffsetBuffer(startIndex, length, target);
+ target.setLastSet(length - 1);
+ if (length > 0) {
+ target.setValueCount(length);
+ }
+ }
+
+ /**
+ * Transfer the offsets along with data. Unlike the data buffer, we cannot simply
+ * slice the offset buffer for split and transfer. The reason is that offsets
+ * in the target vector have to be adjusted and made relative to the staring
+ * offset in source vector from the start index of split. This is why, we
+ * need to explicitly allocate the offset buffer and set the adjusted offsets
+ * in the target vector.
+ */
+ private void splitAndTransferOffsetBuffer(int startIndex, int length, BaseLargeVariableWidthVector target) {
+ final long start = offsetBuffer.getLong((long) startIndex * OFFSET_WIDTH);
+ final long end = offsetBuffer.getLong((long) (startIndex + length) * OFFSET_WIDTH);
+ final long dataLength = end - start;
+ target.allocateOffsetBuffer((long) (length + 1) * OFFSET_WIDTH);
+ for (int i = 0; i < length + 1; i++) {
+ final long relativeSourceOffset = offsetBuffer.getLong((long) (startIndex + i) * OFFSET_WIDTH) - start;
+ target.offsetBuffer.setLong((long) i * OFFSET_WIDTH, relativeSourceOffset);
+ }
+ final ArrowBuf slicedBuffer = valueBuffer.slice(start, dataLength);
+ target.valueBuffer = transferBuffer(slicedBuffer, target.allocator);
+ }
+
+ /*
+ * Transfer the validity.
+ */
+ private void splitAndTransferValidityBuffer(int startIndex, int length,
+ BaseLargeVariableWidthVector target) {
+ int firstByteSource = BitVectorHelper.byteIndex(startIndex);
+ int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1);
+ int byteSizeTarget = getValidityBufferSizeFromCount(length);
+ int offset = startIndex % 8;
+
+ if (length > 0) {
+ if (offset == 0) {
+ // slice
+ if (target.validityBuffer != null) {
+ target.validityBuffer.getReferenceManager().release();
+ }
+ target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget);
+ target.validityBuffer.getReferenceManager().retain();
+ } else {
+ /* Copy data
+ * When the first bit starts from the middle of a byte (offset != 0),
+ * copy data from src BitVector.
+ * Each byte in the target is composed by a part in i-th byte,
+ * another part in (i+1)-th byte.
+ */
+ target.allocateValidityBuffer(byteSizeTarget);
+
+ for (int i = 0; i < byteSizeTarget - 1; i++) {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(this.validityBuffer, firstByteSource + i, offset);
+ byte b2 = BitVectorHelper.getBitsFromNextByte(this.validityBuffer, firstByteSource + i + 1, offset);
+
+ target.validityBuffer.setByte(i, (b1 + b2));
+ }
+ /* Copying the last piece is done in the following manner:
+ * if the source vector has 1 or more bytes remaining, we copy
+ * the last piece as a byte formed by shifting data
+ * from the current byte and the next byte.
+ *
+ * if the source vector has no more bytes remaining
+ * (we are at the last byte), we copy the last piece as a byte
+ * by shifting data from the current byte.
+ */
+ if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(this.validityBuffer,
+ firstByteSource + byteSizeTarget - 1, offset);
+ byte b2 = BitVectorHelper.getBitsFromNextByte(this.validityBuffer,
+ firstByteSource + byteSizeTarget, offset);
+
+ target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2);
+ } else {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(this.validityBuffer,
+ firstByteSource + byteSizeTarget - 1, offset);
+ target.validityBuffer.setByte(byteSizeTarget - 1, b1);
+ }
+ }
+ }
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | common getters and setters |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the number of elements that are null in the vector.
+ *
+ * @return the number of null elements.
+ */
+ public int getNullCount() {
+ return BitVectorHelper.getNullCount(validityBuffer, valueCount);
+ }
+
+ /**
+ * Check if the given index is within the current value capacity
+ * of the vector.
+ *
+ * @param index position to check
+ * @return true if index is within the current value capacity
+ */
+ public boolean isSafe(int index) {
+ return index < getValueCapacity();
+ }
+
+ /**
+ * Check if element at given index is null.
+ *
+ * @param index position of element
+ * @return true if element at given index is null
+ */
+ public boolean isNull(int index) {
+ return (isSet(index) == 0);
+ }
+
+ /**
+ * Same as {@link #isNull(int)}.
+ *
+ * @param index position of element
+ * @return 1 if element at given index is not null, 0 otherwise
+ */
+ public int isSet(int index) {
+ final int byteIndex = index >> 3;
+ final byte b = validityBuffer.getByte(byteIndex);
+ final int bitIndex = index & 7;
+ return (b >> bitIndex) & 0x01;
+ }
+
+ /**
+ * Get the value count of vector. This will always be zero unless
+ * setValueCount(int) has been called prior to calling this.
+ *
+ * @return valueCount for the vector
+ */
+ public int getValueCount() {
+ return valueCount;
+ }
+
+ /**
+ * Sets the value count for the vector.
+ *
+ * @param valueCount value count
+ */
+ public void setValueCount(int valueCount) {
+ assert valueCount >= 0;
+ this.valueCount = valueCount;
+ while (valueCount > getValueCapacity()) {
+ reallocValidityAndOffsetBuffers();
+ }
+ fillHoles(valueCount);
+ lastSet = valueCount - 1;
+ setReaderAndWriterIndex();
+ }
+
+ /**
+ * Create holes in the vector upto the given index (exclusive).
+ * Holes will be created from the current last set position in
+ * the vector.
+ *
+ * @param index target index
+ */
+ public void fillEmpties(int index) {
+ handleSafe(index, emptyByteArray.length);
+ fillHoles(index);
+ lastSet = index - 1;
+ }
+
+ /**
+ * Set the index of last non-null element in the vector.
+ * It is important to call this method with appropriate value
+ * before calling {@link #setValueCount(int)}.
+ *
+ * @param value desired index of last non-null element.
+ */
+ public void setLastSet(int value) {
+ lastSet = value;
+ }
+
+ /**
+ * Get the index of last non-null element in the vector.
+ *
+ * @return index of the last non-null element
+ */
+ public int getLastSet() {
+ return lastSet;
+ }
+
+ /**
+ * Mark the particular position in the vector as non-null.
+ *
+ * @param index position of the element.
+ */
+ @Override
+ public void setIndexDefined(int index) {
+ // We need to check and realloc both validity and offset buffer
+ while (index >= getValueCapacity()) {
+ reallocValidityAndOffsetBuffers();
+ }
+ BitVectorHelper.setBit(validityBuffer, index);
+ }
+
+ /**
+ * Sets the value length for an element.
+ *
+ * @param index position of the element to set
+ * @param length length of the element
+ */
+ public void setValueLengthSafe(int index, int length) {
+ assert index >= 0;
+ handleSafe(index, length);
+ fillHoles(index);
+ final long startOffset = getStartOffset(index);
+ offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + length);
+ lastSet = index;
+ }
+
+ /**
+ * Get the variable length element at specified index as Text.
+ *
+ * @param index position of element to get
+ * @return greater than 0 length for non-null element, 0 otherwise
+ */
+ public int getValueLength(int index) {
+ assert index >= 0;
+ if (isSet(index) == 0) {
+ return 0;
+ }
+ final long startOffset = getStartOffset(index);
+ final int dataLength =
+ (int) (offsetBuffer.getLong((long) (index + 1) * OFFSET_WIDTH) - startOffset);
+ return dataLength;
+ }
+
+ /**
+ * Set the variable length element at the specified index to the supplied
+ * byte array. This is same as using {@link #set(int, byte[], int, int)}
+ * with start as 0 and length as value.length
+ *
+ * @param index position of the element to set
+ * @param value array of bytes to write
+ */
+ public void set(int index, byte[] value) {
+ assert index >= 0;
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ setBytes(index, value, 0, value.length);
+ lastSet = index;
+ }
+
+ /**
+ * Same as {@link #set(int, byte[])} except that it handles the
+ * case where index and length of new element are beyond the existing
+ * capacity of the vector.
+ *
+ * @param index position of the element to set
+ * @param value array of bytes to write
+ */
+ public void setSafe(int index, byte[] value) {
+ assert index >= 0;
+ handleSafe(index, value.length);
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ setBytes(index, value, 0, value.length);
+ lastSet = index;
+ }
+
+ /**
+ * Set the variable length element at the specified index to the supplied
+ * byte array.
+ *
+ * @param index position of the element to set
+ * @param value array of bytes to write
+ * @param start start index in array of bytes
+ * @param length length of data in array of bytes
+ */
+ public void set(int index, byte[] value, int start, int length) {
+ assert index >= 0;
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ setBytes(index, value, start, length);
+ lastSet = index;
+ }
+
+ /**
+ * Same as {@link #set(int, byte[], int, int)} except that it handles the
+ * case where index and length of new element are beyond the existing
+ * capacity of the vector.
+ *
+ * @param index position of the element to set
+ * @param value array of bytes to write
+ * @param start start index in array of bytes
+ * @param length length of data in array of bytes
+ */
+ public void setSafe(int index, byte[] value, int start, int length) {
+ assert index >= 0;
+ handleSafe(index, length);
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ setBytes(index, value, start, length);
+ lastSet = index;
+ }
+
+ /**
+ * Set the variable length element at the specified index to the
+ * content in supplied ByteBuffer.
+ *
+ * @param index position of the element to set
+ * @param value ByteBuffer with data
+ * @param start start index in ByteBuffer
+ * @param length length of data in ByteBuffer
+ */
+ public void set(int index, ByteBuffer value, int start, int length) {
+ assert index >= 0;
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ final long startOffset = getStartOffset(index);
+ offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + length);
+ valueBuffer.setBytes(startOffset, value, start, length);
+ lastSet = index;
+ }
+
+ /**
+ * Same as {@link #set(int, ByteBuffer, int, int)} except that it handles the
+ * case where index and length of new element are beyond the existing
+ * capacity of the vector.
+ *
+ * @param index position of the element to set
+ * @param value ByteBuffer with data
+ * @param start start index in ByteBuffer
+ * @param length length of data in ByteBuffer
+ */
+ public void setSafe(int index, ByteBuffer value, int start, int length) {
+ assert index >= 0;
+ handleSafe(index, length);
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ final long startOffset = getStartOffset(index);
+ offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + length);
+ valueBuffer.setBytes(startOffset, value, start, length);
+ lastSet = index;
+ }
+
+ /**
+ * Set the element at the given index to null.
+ *
+ * @param index position of element
+ */
+ public void setNull(int index) {
+ // We need to check and realloc both validity and offset buffer
+ while (index >= getValueCapacity()) {
+ reallocValidityAndOffsetBuffers();
+ }
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param start start position of data in buffer
+ * @param end end position of data in buffer
+ * @param buffer data buffer containing the variable width element to be stored
+ * in the vector
+ */
+ public void set(int index, int isSet, long start, long end, ArrowBuf buffer) {
+ assert index >= 0;
+ final long dataLength = end - start;
+ fillHoles(index);
+ BitVectorHelper.setValidityBit(validityBuffer, index, isSet);
+ final long startOffset = offsetBuffer.getLong((long) index * OFFSET_WIDTH);
+ offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, end);
+ valueBuffer.setBytes(startOffset, buffer, start, dataLength);
+ lastSet = index;
+ }
+
+ /**
+ * Same as {@link #set(int, int, long, long, ArrowBuf)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param start start position of data in buffer
+ * @param end end position of data in buffer
+ * @param buffer data buffer containing the variable width element to be stored
+ * in the vector
+ */
+ public void setSafe(int index, int isSet, long start, long end, ArrowBuf buffer) {
+ assert index >= 0;
+ final long dataLength = end - start;
+ handleSafe(index, (int) dataLength);
+ fillHoles(index);
+ BitVectorHelper.setValidityBit(validityBuffer, index, isSet);
+ final long startOffset = offsetBuffer.getLong((long) index * OFFSET_WIDTH);
+ offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+ valueBuffer.setBytes(startOffset, buffer, start, dataLength);
+ lastSet = index;
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ * @param index position of the new value
+ * @param start start position of data in buffer
+ * @param length length of data in buffer
+ * @param buffer data buffer containing the variable width element to be stored
+ * in the vector
+ */
+ public void set(int index, long start, int length, ArrowBuf buffer) {
+ assert index >= 0;
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ final long startOffset = offsetBuffer.getLong((long) index * OFFSET_WIDTH);
+ offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + length);
+ final ArrowBuf bb = buffer.slice(start, length);
+ valueBuffer.setBytes(startOffset, bb);
+ lastSet = index;
+ }
+
+ /**
+ * Same as {@link #set(int, int, long, long, ArrowBuf)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ * @param index position of the new value
+ * @param start start position of data in buffer
+ * @param length length of data in buffer
+ * @param buffer data buffer containing the variable width element to be stored
+ * in the vector
+ */
+ public void setSafe(int index, long start, int length, ArrowBuf buffer) {
+ assert index >= 0;
+ handleSafe(index, length);
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ final long startOffset = offsetBuffer.getLong((long) index * OFFSET_WIDTH);
+ offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + length);
+ final ArrowBuf bb = buffer.slice(start, length);
+ valueBuffer.setBytes(startOffset, bb);
+ lastSet = index;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | helper methods for setters |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ protected final void fillHoles(int index) {
+ for (int i = lastSet + 1; i < index; i++) {
+ setBytes(i, emptyByteArray, 0, emptyByteArray.length);
+ }
+ lastSet = index - 1;
+ }
+
+ protected final void setBytes(int index, byte[] value, int start, int length) {
+ /* end offset of current last element in the vector. this will
+ * be the start offset of new element we are trying to store.
+ */
+ final long startOffset = getStartOffset(index);
+ /* set new end offset */
+ offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + length);
+ /* store the var length data in value buffer */
+ valueBuffer.setBytes(startOffset, value, start, length);
+ }
+
+ /**
+ * Gets the starting offset of a record, given its index.
+ * @param index index of the record.
+ * @return the starting offset of the record.
+ */
+ protected final long getStartOffset(int index) {
+ return offsetBuffer.getLong((long) index * OFFSET_WIDTH);
+ }
+
+ protected final void handleSafe(int index, int dataLength) {
+ /*
+ * IMPORTANT:
+ * value buffer for variable length vectors moves independent
+ * of the companion validity and offset buffers. This is in
+ * contrast to what we have for fixed width vectors.
+ *
+ * Here there is no concept of getValueCapacity() in the
+ * data stream. getValueCapacity() is applicable only to validity
+ * and offset buffers.
+ *
+ * So even though we may have setup an initial capacity of 1024
+ * elements in the vector, it is quite possible
+ * that we need to reAlloc() the data buffer when we are setting
+ * the 5th element in the vector simply because previous
+ * variable length elements have exhausted the buffer capacity.
+ * However, we really don't need to reAlloc() validity and
+ * offset buffers until we try to set the 1025th element
+ * This is why we do a separate check for safe methods to
+ * determine which buffer needs reallocation.
+ */
+ while (index >= getValueCapacity()) {
+ reallocValidityAndOffsetBuffers();
+ }
+ final long startOffset = lastSet < 0 ? 0L : getStartOffset(lastSet + 1);
+ while (valueBuffer.capacity() < (startOffset + dataLength)) {
+ reallocDataBuffer();
+ }
+ }
+
+ /**
+ * Method used by Json Writer to read a variable width element from
+ * the variable width vector and write to Json.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param data buffer storing the variable width vector elements
+ * @param offset buffer storing the offsets of variable width vector elements
+ * @param index position of the element in the vector
+ * @return array of bytes
+ */
+ public static byte[] get(final ArrowBuf data, final ArrowBuf offset, int index) {
+ final long currentStartOffset = offset.getLong((long) index * OFFSET_WIDTH);
+ final int dataLength =
+ (int) (offset.getLong((long) (index + 1) * OFFSET_WIDTH) - currentStartOffset);
+ final byte[] result = new byte[dataLength];
+ data.getBytes(currentStartOffset, result, 0, dataLength);
+ return result;
+ }
+
+ /**
+ * Method used by Json Reader to explicitly set the offsets of the variable
+ * width vector data. The method takes care of allocating the memory for
+ * offsets if the caller hasn't done so.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer ArrowBuf to store offsets for variable width elements
+ * @param allocator memory allocator
+ * @param valueCount number of elements
+ * @param index position of the element
+ * @param value offset of the element
+ * @return buffer holding the offsets
+ */
+ public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator,
+ int valueCount, int index, long value) {
+ if (buffer == null) {
+ buffer = allocator.buffer((long) valueCount * OFFSET_WIDTH);
+ }
+ buffer.setLong((long) index * OFFSET_WIDTH, value);
+ if (index == (valueCount - 1)) {
+ buffer.writerIndex((long) valueCount * OFFSET_WIDTH);
+ }
+
+ return buffer;
+ }
+
+ /**
+ * Copy a cell value from a particular index in source vector to a particular
+ * position in this vector.
+ *
+ * @param fromIndex position to copy from in source vector
+ * @param thisIndex position to copy to in this vector
+ * @param from source vector
+ */
+ @Override
+ public void copyFrom(int fromIndex, int thisIndex, ValueVector from) {
+ Preconditions.checkArgument(this.getMinorType() == from.getMinorType());
+ if (from.isNull(fromIndex)) {
+ fillHoles(thisIndex);
+ BitVectorHelper.unsetBit(this.validityBuffer, thisIndex);
+ final long copyStart = offsetBuffer.getLong((long) thisIndex * OFFSET_WIDTH);
+ offsetBuffer.setLong((long) (thisIndex + 1) * OFFSET_WIDTH, copyStart);
+ } else {
+ final long start = from.getOffsetBuffer().getLong((long) fromIndex * OFFSET_WIDTH);
+ final long end = from.getOffsetBuffer().getLong((long) (fromIndex + 1) * OFFSET_WIDTH);
+ final long length = end - start;
+ fillHoles(thisIndex);
+ BitVectorHelper.setBit(this.validityBuffer, thisIndex);
+ final long copyStart = offsetBuffer.getLong((long) thisIndex * OFFSET_WIDTH);
+ from.getDataBuffer().getBytes(start, this.valueBuffer, copyStart, (int) length);
+ offsetBuffer.setLong((long) (thisIndex + 1) * OFFSET_WIDTH, copyStart + length);
+ }
+ lastSet = thisIndex;
+ }
+
+ /**
+ * Same as {@link #copyFrom(int, int, ValueVector)} except that
+ * it handles the case when the capacity of the vector needs to be expanded
+ * before copy.
+ *
+ * @param fromIndex position to copy from in source vector
+ * @param thisIndex position to copy to in this vector
+ * @param from source vector
+ */
+ @Override
+ public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) {
+ Preconditions.checkArgument(this.getMinorType() == from.getMinorType());
+ if (from.isNull(fromIndex)) {
+ handleSafe(thisIndex, 0);
+ fillHoles(thisIndex);
+ BitVectorHelper.unsetBit(this.validityBuffer, thisIndex);
+ final long copyStart = offsetBuffer.getLong((long) thisIndex * OFFSET_WIDTH);
+ offsetBuffer.setLong((long) (thisIndex + 1) * OFFSET_WIDTH, copyStart);
+ } else {
+ final long start = from.getOffsetBuffer().getLong((long) fromIndex * OFFSET_WIDTH);
+ final long end = from.getOffsetBuffer().getLong((long) (fromIndex + 1) * OFFSET_WIDTH);
+ final int length = (int) (end - start);
+ handleSafe(thisIndex, length);
+ fillHoles(thisIndex);
+ BitVectorHelper.setBit(this.validityBuffer, thisIndex);
+ final long copyStart = offsetBuffer.getLong((long) thisIndex * OFFSET_WIDTH);
+ from.getDataBuffer().getBytes(start, this.valueBuffer, copyStart, length);
+ offsetBuffer.setLong((long) (thisIndex + 1) * OFFSET_WIDTH, copyStart + length);
+ }
+ lastSet = thisIndex;
+ }
+
+ @Override
+ public ArrowBufPointer getDataPointer(int index) {
+ return getDataPointer(index, new ArrowBufPointer());
+ }
+
+ @Override
+ public ArrowBufPointer getDataPointer(int index, ArrowBufPointer reuse) {
+ if (isNull(index)) {
+ reuse.set(null, 0, 0);
+ } else {
+ long offset = offsetBuffer.getLong((long) index * OFFSET_WIDTH);
+ int length = (int) (offsetBuffer.getLong((long) (index + 1) * OFFSET_WIDTH) - offset);
+ reuse.set(valueBuffer, offset, length);
+ }
+ return reuse;
+ }
+
+ @Override
+ public int hashCode(int index) {
+ return hashCode(index, null);
+ }
+
+ @Override
+ public int hashCode(int index, ArrowBufHasher hasher) {
+ if (isNull(index)) {
+ return ArrowBufPointer.NULL_HASH_CODE;
+ }
+ final long start = getStartOffset(index);
+ final long end = getStartOffset(index + 1);
+ return ByteFunctionHelpers.hash(hasher, this.getDataBuffer(), start, end);
+ }
+
+ @Override
+ public <OUT, IN> OUT accept(VectorVisitor<OUT, IN> visitor, IN value) {
+ return visitor.visit(this, value);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java
new file mode 100644
index 000000000..22fe4254f
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java
@@ -0,0 +1,231 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import java.util.Collections;
+import java.util.Iterator;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.ReferenceManager;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.util.DataSizeRoundingUtil;
+import org.apache.arrow.vector.util.TransferPair;
+import org.apache.arrow.vector.util.ValueVectorUtility;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Base class for other Arrow Vector Types. Provides basic functionality around
+ * memory management.
+ */
+public abstract class BaseValueVector implements ValueVector {
+ private static final Logger logger = LoggerFactory.getLogger(BaseValueVector.class);
+
+ public static final String MAX_ALLOCATION_SIZE_PROPERTY = "arrow.vector.max_allocation_bytes";
+ public static final long MAX_ALLOCATION_SIZE = Long.getLong(MAX_ALLOCATION_SIZE_PROPERTY, Long.MAX_VALUE);
+ /*
+ * For all fixed width vectors, the value and validity buffers are sliced from a single buffer.
+ * Similarly, for variable width vectors, the offsets and validity buffers are sliced from a
+ * single buffer. To ensure the single buffer is power-of-2 size, the initial value allocation
+ * should be less than power-of-2. For IntVectors, this comes to 3970*4 (15880) for the data
+ * buffer and 504 bytes for the validity buffer, totalling to 16384 (2^16).
+ */
+ public static final int INITIAL_VALUE_ALLOCATION = 3970;
+
+ protected final BufferAllocator allocator;
+
+ protected BaseValueVector(BufferAllocator allocator) {
+ this.allocator = Preconditions.checkNotNull(allocator, "allocator cannot be null");
+ }
+
+ @Override
+ public abstract String getName();
+
+ /**
+ * Representation of vector suitable for debugging.
+ */
+ @Override
+ public String toString() {
+ return ValueVectorUtility.getToString(this, 0, getValueCount());
+ }
+
+ @Override
+ public void clear() {
+ }
+
+ @Override
+ public void close() {
+ clear();
+ }
+
+ @Override
+ public TransferPair getTransferPair(BufferAllocator allocator) {
+ return getTransferPair(getName(), allocator);
+ }
+
+ @Override
+ public Iterator<ValueVector> iterator() {
+ return Collections.emptyIterator();
+ }
+
+ /**
+ * Checks to ensure that every buffer <code>vv</code> uses
+ * has a positive reference count, throws if this precondition
+ * isn't met. Returns true otherwise.
+ */
+ public static boolean checkBufRefs(final ValueVector vv) {
+ for (final ArrowBuf buffer : vv.getBuffers(false)) {
+ if (buffer.refCnt() <= 0) {
+ throw new IllegalStateException("zero refcount");
+ }
+ }
+
+ return true;
+ }
+
+ @Override
+ public BufferAllocator getAllocator() {
+ return allocator;
+ }
+
+ void compareTypes(BaseValueVector target, String caller) {
+ if (this.getMinorType() != target.getMinorType()) {
+ throw new UnsupportedOperationException(caller + " should have vectors of exact same type");
+ }
+ }
+
+ protected ArrowBuf releaseBuffer(ArrowBuf buffer) {
+ buffer.getReferenceManager().release();
+ buffer = allocator.getEmpty();
+ return buffer;
+ }
+
+ /* number of bytes for the validity buffer for the given valueCount */
+ protected static int getValidityBufferSizeFromCount(final int valueCount) {
+ return DataSizeRoundingUtil.divideBy8Ceil(valueCount);
+ }
+
+ /* round up bytes for the validity buffer for the given valueCount */
+ private static long roundUp8ForValidityBuffer(long valueCount) {
+ return ((valueCount + 63) >> 6) << 3;
+ }
+
+ long computeCombinedBufferSize(int valueCount, int typeWidth) {
+ Preconditions.checkArgument(valueCount >= 0, "valueCount must be >= 0");
+ Preconditions.checkArgument(typeWidth >= 0, "typeWidth must be >= 0");
+
+ // compute size of validity buffer.
+ long bufferSize = roundUp8ForValidityBuffer(valueCount);
+
+ // add the size of the value buffer.
+ if (typeWidth == 0) {
+ // for boolean type, value-buffer and validity-buffer are of same size.
+ bufferSize *= 2;
+ } else {
+ bufferSize += DataSizeRoundingUtil.roundUpTo8Multiple((long) valueCount * typeWidth);
+ }
+ return allocator.getRoundingPolicy().getRoundedSize(bufferSize);
+ }
+
+ /**
+ * Container for primitive vectors (1 for the validity bit-mask and one to hold the values).
+ */
+ class DataAndValidityBuffers {
+ private ArrowBuf dataBuf;
+ private ArrowBuf validityBuf;
+
+ DataAndValidityBuffers(ArrowBuf dataBuf, ArrowBuf validityBuf) {
+ this.dataBuf = dataBuf;
+ this.validityBuf = validityBuf;
+ }
+
+ ArrowBuf getDataBuf() {
+ return dataBuf;
+ }
+
+ ArrowBuf getValidityBuf() {
+ return validityBuf;
+ }
+ }
+
+ DataAndValidityBuffers allocFixedDataAndValidityBufs(int valueCount, int typeWidth) {
+ long bufferSize = computeCombinedBufferSize(valueCount, typeWidth);
+ assert bufferSize <= MAX_ALLOCATION_SIZE;
+
+ long validityBufferSize;
+ long dataBufferSize;
+ if (typeWidth == 0) {
+ validityBufferSize = dataBufferSize = bufferSize / 2;
+ } else {
+ // Due to the rounding policy, the bufferSize could be greater than the
+ // requested size. Utilize the allocated buffer fully.;
+ long actualCount = (long) ((bufferSize * 8.0) / (8 * typeWidth + 1));
+ do {
+ validityBufferSize = roundUp8ForValidityBuffer(actualCount);
+ dataBufferSize = DataSizeRoundingUtil.roundUpTo8Multiple(actualCount * typeWidth);
+ if (validityBufferSize + dataBufferSize <= bufferSize) {
+ break;
+ }
+ --actualCount;
+ }
+ while (true);
+ }
+
+
+ /* allocate combined buffer */
+ ArrowBuf combinedBuffer = allocator.buffer(bufferSize);
+
+ /* slice into requested lengths */
+ ArrowBuf dataBuf = null;
+ ArrowBuf validityBuf = null;
+ long bufferOffset = 0;
+ for (int numBuffers = 0; numBuffers < 2; ++numBuffers) {
+ long len = (numBuffers == 0 ? dataBufferSize : validityBufferSize);
+ ArrowBuf buf = combinedBuffer.slice(bufferOffset, len);
+ buf.getReferenceManager().retain();
+ buf.readerIndex(0);
+ buf.writerIndex(0);
+
+ bufferOffset += len;
+ if (numBuffers == 0) {
+ dataBuf = buf;
+ } else {
+ validityBuf = buf;
+ }
+ }
+ combinedBuffer.getReferenceManager().release();
+ return new DataAndValidityBuffers(dataBuf, validityBuf);
+ }
+
+ public static ArrowBuf transferBuffer(final ArrowBuf srcBuffer, final BufferAllocator targetAllocator) {
+ final ReferenceManager referenceManager = srcBuffer.getReferenceManager();
+ return referenceManager.transferOwnership(srcBuffer, targetAllocator).getTransferredBuffer();
+ }
+
+ @Override
+ public void copyFrom(int fromIndex, int thisIndex, ValueVector from) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) {
+ throw new UnsupportedOperationException();
+ }
+}
+
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
new file mode 100644
index 000000000..866dd9e21
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
@@ -0,0 +1,1410 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.OutOfMemoryException;
+import org.apache.arrow.memory.util.ArrowBufPointer;
+import org.apache.arrow.memory.util.ByteFunctionHelpers;
+import org.apache.arrow.memory.util.CommonUtil;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.OversizedAllocationException;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * BaseVariableWidthVector is a base class providing functionality for strings/bytes types.
+ */
+public abstract class BaseVariableWidthVector extends BaseValueVector
+ implements VariableWidthVector, FieldVector, VectorDefinitionSetter {
+ private static final int DEFAULT_RECORD_BYTE_COUNT = 8;
+ private static final int INITIAL_BYTE_COUNT = INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT;
+ private int lastValueCapacity;
+ private long lastValueAllocationSizeInBytes;
+
+ /* protected members */
+ public static final int OFFSET_WIDTH = 4; /* 4 byte unsigned int to track offsets */
+ protected static final byte[] emptyByteArray = new byte[]{};
+ protected ArrowBuf validityBuffer;
+ protected ArrowBuf valueBuffer;
+ protected ArrowBuf offsetBuffer;
+ protected int valueCount;
+ protected int lastSet;
+ protected final Field field;
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param field The field materialized by this vector.
+ * @param allocator The allocator to use for creating/resizing buffers
+ */
+ public BaseVariableWidthVector(Field field, final BufferAllocator allocator) {
+ super(allocator);
+ this.field = field;
+ lastValueAllocationSizeInBytes = INITIAL_BYTE_COUNT;
+ // -1 because we require one extra slot for the offset array.
+ lastValueCapacity = INITIAL_VALUE_ALLOCATION - 1;
+ valueCount = 0;
+ lastSet = -1;
+ offsetBuffer = allocator.getEmpty();
+ validityBuffer = allocator.getEmpty();
+ valueBuffer = allocator.getEmpty();
+ }
+
+ @Override
+ public String getName() {
+ return field.getName();
+ }
+
+ /* TODO:
+ * see if getNullCount() can be made faster -- O(1)
+ */
+
+ /* TODO:
+ * Once the entire hierarchy has been refactored, move common functions
+ * like getNullCount(), splitAndTransferValidityBuffer to top level
+ * base class BaseValueVector.
+ *
+ * Along with this, some class members (validityBuffer) can also be
+ * abstracted out to top level base class.
+ *
+ * Right now BaseValueVector is the top level base class for other
+ * vector types in ValueVector hierarchy (non-nullable) and those
+ * vectors have not yet been refactored/removed so moving things to
+ * the top class as of now is not a good idea.
+ */
+
+ /**
+ * Get buffer that manages the validity (NULL or NON-NULL nature) of
+ * elements in the vector. Consider it as a buffer for internal bit vector
+ * data structure.
+ * @return buffer
+ */
+ @Override
+ public ArrowBuf getValidityBuffer() {
+ return validityBuffer;
+ }
+
+ /**
+ * Get the buffer that stores the data for elements in the vector.
+ * @return buffer
+ */
+ @Override
+ public ArrowBuf getDataBuffer() {
+ return valueBuffer;
+ }
+
+ /**
+ * buffer that stores the offsets for elements
+ * in the vector. This operation is not supported for fixed-width vectors.
+ * @return buffer
+ */
+ @Override
+ public ArrowBuf getOffsetBuffer() {
+ return offsetBuffer;
+ }
+
+ /**
+ * Get the memory address of buffer that stores the offsets for elements
+ * in the vector.
+ * @return starting address of the buffer
+ */
+ @Override
+ public long getOffsetBufferAddress() {
+ return offsetBuffer.memoryAddress();
+ }
+
+ /**
+ * Get the memory address of buffer that manages the validity
+ * (NULL or NON-NULL nature) of elements in the vector.
+ * @return starting address of the buffer
+ */
+ @Override
+ public long getValidityBufferAddress() {
+ return validityBuffer.memoryAddress();
+ }
+
+ /**
+ * Get the memory address of buffer that stores the data for elements
+ * in the vector.
+ * @return starting address of the buffer
+ */
+ @Override
+ public long getDataBufferAddress() {
+ return valueBuffer.memoryAddress();
+ }
+
+ /**
+ * Sets the desired value capacity for the vector. This function doesn't
+ * allocate any memory for the vector.
+ * @param valueCount desired number of elements in the vector
+ */
+ @Override
+ public void setInitialCapacity(int valueCount) {
+ final long size = (long) valueCount * DEFAULT_RECORD_BYTE_COUNT;
+ checkDataBufferSize(size);
+ computeAndCheckOffsetsBufferSize(valueCount);
+ lastValueAllocationSizeInBytes = (int) size;
+ lastValueCapacity = valueCount;
+ }
+
+ /**
+ * Sets the desired value capacity for the vector. This function doesn't
+ * allocate any memory for the vector.
+ * @param valueCount desired number of elements in the vector
+ * @param density average number of bytes per variable width element
+ */
+ @Override
+ public void setInitialCapacity(int valueCount, double density) {
+ long size = Math.max((long) (valueCount * density), 1L);
+ checkDataBufferSize(size);
+ computeAndCheckOffsetsBufferSize(valueCount);
+ lastValueAllocationSizeInBytes = (int) size;
+ lastValueCapacity = valueCount;
+ }
+
+ /**
+ * Get the density of this ListVector.
+ * @return density
+ */
+ public double getDensity() {
+ if (valueCount == 0) {
+ return 0.0D;
+ }
+ final int startOffset = offsetBuffer.getInt(0);
+ final int endOffset = offsetBuffer.getInt((long) valueCount * OFFSET_WIDTH);
+ final double totalListSize = endOffset - startOffset;
+ return totalListSize / valueCount;
+ }
+
+ /**
+ * Get the current capacity which does not exceed either validity buffer or offset buffer.
+ * Note: Here the `getValueCapacity` has no relationship with the value buffer.
+ * @return number of elements that vector can hold.
+ */
+ @Override
+ public int getValueCapacity() {
+ final int offsetValueCapacity = Math.max(getOffsetBufferValueCapacity() - 1, 0);
+ return Math.min(offsetValueCapacity, getValidityBufferValueCapacity());
+ }
+
+ private int getValidityBufferValueCapacity() {
+ return capAtMaxInt(validityBuffer.capacity() * 8);
+ }
+
+ private int getOffsetBufferValueCapacity() {
+ return capAtMaxInt(offsetBuffer.capacity() / OFFSET_WIDTH);
+ }
+
+ /**
+ * zero out the vector and the data in associated buffers.
+ */
+ public void zeroVector() {
+ initValidityBuffer();
+ initOffsetBuffer();
+ valueBuffer.setZero(0, valueBuffer.capacity());
+ }
+
+ /* zero out the validity buffer */
+ private void initValidityBuffer() {
+ validityBuffer.setZero(0, validityBuffer.capacity());
+ }
+
+ /* zero out the offset buffer */
+ private void initOffsetBuffer() {
+ offsetBuffer.setZero(0, offsetBuffer.capacity());
+ }
+
+ /**
+ * Reset the vector to initial state. Same as {@link #zeroVector()}.
+ * Note that this method doesn't release any memory.
+ */
+ public void reset() {
+ zeroVector();
+ lastSet = -1;
+ valueCount = 0;
+ }
+
+ /**
+ * Close the vector and release the associated buffers.
+ */
+ @Override
+ public void close() {
+ clear();
+ }
+
+ /**
+ * Same as {@link #close()}.
+ */
+ @Override
+ public void clear() {
+ validityBuffer = releaseBuffer(validityBuffer);
+ valueBuffer = releaseBuffer(valueBuffer);
+ offsetBuffer = releaseBuffer(offsetBuffer);
+ lastSet = -1;
+ valueCount = 0;
+ }
+
+ /**
+ * Get the inner vectors.
+ *
+ * @deprecated This API will be removed as the current implementations no longer support inner vectors.
+ *
+ * @return the inner vectors for this field as defined by the TypeLayout
+ */
+ @Deprecated
+ @Override
+ public List<BufferBacked> getFieldInnerVectors() {
+ throw new UnsupportedOperationException("There are no inner vectors. Use getFieldBuffers");
+ }
+
+ /**
+ * Initialize the children in schema for this Field. This operation is a
+ * NO-OP for scalar types since they don't have any children.
+ * @param children the schema
+ * @throws IllegalArgumentException if children is a non-empty list for scalar types.
+ */
+ @Override
+ public void initializeChildrenFromFields(List<Field> children) {
+ if (!children.isEmpty()) {
+ throw new IllegalArgumentException("primitive type vector can not have children");
+ }
+ }
+
+ /**
+ * Get the inner child vectors.
+ * @return list of child vectors for complex types, empty list for scalar vector types
+ */
+ @Override
+ public List<FieldVector> getChildrenFromFields() {
+ return Collections.emptyList();
+ }
+
+
+ /**
+ * Load the buffers of this vector with provided source buffers.
+ * The caller manages the source buffers and populates them before invoking
+ * this method.
+ * @param fieldNode the fieldNode indicating the value count
+ * @param ownBuffers the buffers for this Field (own buffers only, children not included)
+ */
+ @Override
+ public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers) {
+ ArrowBuf bitBuffer = ownBuffers.get(0);
+ ArrowBuf offBuffer = ownBuffers.get(1);
+ ArrowBuf dataBuffer = ownBuffers.get(2);
+
+ validityBuffer.getReferenceManager().release();
+ validityBuffer = BitVectorHelper.loadValidityBuffer(fieldNode, bitBuffer, allocator);
+ offsetBuffer.getReferenceManager().release();
+ offsetBuffer = offBuffer.getReferenceManager().retain(offBuffer, allocator);
+ valueBuffer.getReferenceManager().release();
+ valueBuffer = dataBuffer.getReferenceManager().retain(dataBuffer, allocator);
+
+ lastSet = fieldNode.getLength() - 1;
+ valueCount = fieldNode.getLength();
+ }
+
+ /**
+ * Get the buffers belonging to this vector.
+ * @return the inner buffers.
+ */
+ public List<ArrowBuf> getFieldBuffers() {
+ // before flight/IPC, we must bring the vector to a consistent state.
+ // this is because, it is possible that the offset buffers of some trailing values
+ // are not updated. this may cause some data in the data buffer being lost.
+ // for details, please see TestValueVector#testUnloadVariableWidthVector.
+ fillHoles(valueCount);
+
+ List<ArrowBuf> result = new ArrayList<>(3);
+ setReaderAndWriterIndex();
+ result.add(validityBuffer);
+ result.add(offsetBuffer);
+ result.add(valueBuffer);
+
+ return result;
+ }
+
+ /**
+ * Set the reader and writer indexes for the inner buffers.
+ */
+ private void setReaderAndWriterIndex() {
+ validityBuffer.readerIndex(0);
+ offsetBuffer.readerIndex(0);
+ valueBuffer.readerIndex(0);
+ if (valueCount == 0) {
+ validityBuffer.writerIndex(0);
+ offsetBuffer.writerIndex(0);
+ valueBuffer.writerIndex(0);
+ } else {
+ final int lastDataOffset = getStartOffset(valueCount);
+ validityBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount));
+ offsetBuffer.writerIndex((long) (valueCount + 1) * OFFSET_WIDTH);
+ valueBuffer.writerIndex(lastDataOffset);
+ }
+ }
+
+ /**
+ * Same as {@link #allocateNewSafe()}.
+ */
+ @Override
+ public void allocateNew() {
+ allocateNew(lastValueAllocationSizeInBytes, lastValueCapacity);
+ }
+
+ /**
+ * Allocate memory for the vector. We internally use a default value count
+ * of 4096 to allocate memory for at least these many elements in the
+ * vector. See {@link #allocateNew(long, int)} for allocating memory for specific
+ * number of elements in the vector.
+ *
+ * @return false if memory allocation fails, true otherwise.
+ */
+ @Override
+ public boolean allocateNewSafe() {
+ try {
+ allocateNew(lastValueAllocationSizeInBytes, lastValueCapacity);
+ return true;
+ } catch (Exception e) {
+ return false;
+ }
+ }
+
+ /**
+ * Allocate memory for the vector to support storing at least the provided number of
+ * elements in the vector. This method must be called prior to using the ValueVector.
+ *
+ * @param totalBytes desired total memory capacity
+ * @param valueCount the desired number of elements in the vector
+ * @throws org.apache.arrow.memory.OutOfMemoryException if memory allocation fails
+ */
+ @Override
+ public void allocateNew(long totalBytes, int valueCount) {
+ assert totalBytes >= 0;
+
+ checkDataBufferSize(totalBytes);
+ computeAndCheckOffsetsBufferSize(valueCount);
+
+ /* we are doing a new allocation -- release the current buffers */
+ clear();
+
+ try {
+ allocateBytes(totalBytes, valueCount);
+ } catch (Exception e) {
+ clear();
+ throw e;
+ }
+ }
+
+ @Override
+ public void allocateNew(int valueCount) {
+ allocateNew(lastValueAllocationSizeInBytes, valueCount);
+ }
+
+ /* Check if the data buffer size is within bounds. */
+ private void checkDataBufferSize(long size) {
+ if (size > MAX_ALLOCATION_SIZE || size < 0) {
+ throw new OversizedAllocationException("Memory required for vector " +
+ " is (" + size + "), which is more than max allowed (" + MAX_ALLOCATION_SIZE + ")");
+ }
+ }
+
+ /*
+ * Compute the buffer size required for 'valueCount' offsets and validity, and check if it's
+ * within bounds.
+ */
+ private long computeAndCheckOffsetsBufferSize(int valueCount) {
+ /* to track the end offset of last data element in vector, we need
+ * an additional slot in offset buffer.
+ */
+ final long size = computeCombinedBufferSize(valueCount + 1, OFFSET_WIDTH);
+ if (size > MAX_ALLOCATION_SIZE) {
+ throw new OversizedAllocationException("Memory required for vector capacity " +
+ valueCount +
+ " is (" + size + "), which is more than max allowed (" + MAX_ALLOCATION_SIZE + ")");
+ }
+ return size;
+ }
+
+ /* allocate the inner buffers */
+ private void allocateBytes(final long valueBufferSize, final int valueCount) {
+ /* allocate data buffer */
+ long curSize = valueBufferSize;
+ valueBuffer = allocator.buffer(curSize);
+ valueBuffer.readerIndex(0);
+
+ /* allocate offset buffer and validity buffer */
+ DataAndValidityBuffers buffers = allocFixedDataAndValidityBufs(valueCount + 1, OFFSET_WIDTH);
+ offsetBuffer = buffers.getDataBuf();
+ validityBuffer = buffers.getValidityBuf();
+ initOffsetBuffer();
+ initValidityBuffer();
+
+ lastValueCapacity = getValueCapacity();
+ lastValueAllocationSizeInBytes = capAtMaxInt(valueBuffer.capacity());
+ }
+
+ /* allocate offset buffer */
+ private void allocateOffsetBuffer(final long size) {
+ final int curSize = (int) size;
+ offsetBuffer = allocator.buffer(curSize);
+ offsetBuffer.readerIndex(0);
+ initOffsetBuffer();
+ }
+
+ /* allocate validity buffer */
+ private void allocateValidityBuffer(final long size) {
+ final int curSize = (int) size;
+ validityBuffer = allocator.buffer(curSize);
+ validityBuffer.readerIndex(0);
+ initValidityBuffer();
+ }
+
+ /**
+ * Resize the vector to increase the capacity. The internal behavior is to
+ * double the current value capacity.
+ */
+ public void reAlloc() {
+ reallocDataBuffer();
+ reallocValidityAndOffsetBuffers();
+ }
+
+ /**
+ * Reallocate the data buffer. Data Buffer stores the actual data for
+ * VARCHAR or VARBINARY elements in the vector. The behavior is to double
+ * the size of buffer.
+ * @throws OversizedAllocationException if the desired new size is more than
+ * max allowed
+ * @throws OutOfMemoryException if the internal memory allocation fails
+ */
+ public void reallocDataBuffer() {
+ final long currentBufferCapacity = valueBuffer.capacity();
+ long newAllocationSize = currentBufferCapacity * 2;
+ if (newAllocationSize == 0) {
+ if (lastValueAllocationSizeInBytes > 0) {
+ newAllocationSize = lastValueAllocationSizeInBytes;
+ } else {
+ newAllocationSize = INITIAL_BYTE_COUNT * 2L;
+ }
+ }
+ newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize);
+ assert newAllocationSize >= 1;
+
+ checkDataBufferSize(newAllocationSize);
+
+ final ArrowBuf newBuf = allocator.buffer(newAllocationSize);
+ newBuf.setBytes(0, valueBuffer, 0, currentBufferCapacity);
+ valueBuffer.getReferenceManager().release();
+ valueBuffer = newBuf;
+ lastValueAllocationSizeInBytes = valueBuffer.capacity();
+ }
+
+ /**
+ * Reallocate the validity and offset buffers for this vector. Validity
+ * buffer is used to track the NULL or NON-NULL nature of elements in
+ * the vector and offset buffer is used to store the lengths of variable
+ * width elements in the vector.
+ *
+ * <p>Note that data buffer for variable length vectors moves independent
+ * of the companion validity and offset buffers. This is in
+ * contrast to what we have for fixed width vectors.
+ *
+ * <p>So even though we may have setup an initial capacity of 1024
+ * elements in the vector, it is quite possible
+ * that we need to reAlloc() the data buffer when we are setting
+ * the 5th element in the vector simply because previous
+ * variable length elements have exhausted the buffer capacity.
+ * However, we really don't need to reAlloc() validity and
+ * offset buffers until we try to set the 1025th element
+ * This is why we do a separate check for safe methods to
+ * determine which buffer needs reallocation.
+ * @throws OversizedAllocationException if the desired new size is more than
+ * max allowed
+ * @throws OutOfMemoryException if the internal memory allocation fails
+ */
+ public void reallocValidityAndOffsetBuffers() {
+ int targetOffsetCount = capAtMaxInt((offsetBuffer.capacity() / OFFSET_WIDTH) * 2);
+ if (targetOffsetCount == 0) {
+ if (lastValueCapacity > 0) {
+ targetOffsetCount = (lastValueCapacity + 1);
+ } else {
+ targetOffsetCount = 2 * (INITIAL_VALUE_ALLOCATION + 1);
+ }
+ }
+ computeAndCheckOffsetsBufferSize(targetOffsetCount);
+
+ DataAndValidityBuffers buffers = allocFixedDataAndValidityBufs(targetOffsetCount, OFFSET_WIDTH);
+ final ArrowBuf newOffsetBuffer = buffers.getDataBuf();
+ newOffsetBuffer.setBytes(0, offsetBuffer, 0, offsetBuffer.capacity());
+ newOffsetBuffer.setZero(offsetBuffer.capacity(), newOffsetBuffer.capacity() - offsetBuffer.capacity());
+ offsetBuffer.getReferenceManager().release();
+ offsetBuffer = newOffsetBuffer;
+
+ final ArrowBuf newValidityBuffer = buffers.getValidityBuf();
+ newValidityBuffer.setBytes(0, validityBuffer, 0, validityBuffer.capacity());
+ newValidityBuffer.setZero(validityBuffer.capacity(), newValidityBuffer.capacity() - validityBuffer.capacity());
+ validityBuffer.getReferenceManager().release();
+ validityBuffer = newValidityBuffer;
+
+ lastValueCapacity = getValueCapacity();
+ }
+
+ /**
+ * Get the size (number of bytes) of underlying data buffer.
+ * @return number of bytes in the data buffer
+ */
+ @Override
+ public int getByteCapacity() {
+ return capAtMaxInt(valueBuffer.capacity());
+ }
+
+ @Override
+ public int sizeOfValueBuffer() {
+ if (valueCount == 0) {
+ return 0;
+ }
+ return offsetBuffer.getInt((long) valueCount * OFFSET_WIDTH);
+ }
+
+ /**
+ * Get the size (number of bytes) of underlying buffers used by this
+ * vector.
+ * @return size of underlying buffers.
+ */
+ @Override
+ public int getBufferSize() {
+ return getBufferSizeFor(this.valueCount);
+ }
+
+ /**
+ * Get the potential buffer size for a particular number of records.
+ * @param valueCount desired number of elements in the vector
+ * @return estimated size of underlying buffers if the vector holds
+ * a given number of elements
+ */
+ @Override
+ public int getBufferSizeFor(final int valueCount) {
+ if (valueCount == 0) {
+ return 0;
+ }
+
+ final int validityBufferSize = getValidityBufferSizeFromCount(valueCount);
+ final int offsetBufferSize = (valueCount + 1) * OFFSET_WIDTH;
+ /* get the end offset for this valueCount */
+ final int dataBufferSize = offsetBuffer.getInt((long) valueCount * OFFSET_WIDTH);
+ return validityBufferSize + offsetBufferSize + dataBufferSize;
+ }
+
+ /**
+ * Get information about how this field is materialized.
+ * @return the field corresponding to this vector
+ */
+ @Override
+ public Field getField() {
+ return field;
+ }
+
+ /**
+ * Return the underlying buffers associated with this vector. Note that this doesn't
+ * impact the reference counts for this buffer so it only should be used for in-context
+ * access. Also note that this buffer changes regularly thus
+ * external classes shouldn't hold a reference to it (unless they change it).
+ *
+ * @param clear Whether to clear vector before returning; the buffers will still be refcounted
+ * but the returned array will be the only reference to them
+ * @return The underlying {@link ArrowBuf buffers} that is used by this
+ * vector instance.
+ */
+ @Override
+ public ArrowBuf[] getBuffers(boolean clear) {
+ final ArrowBuf[] buffers;
+ setReaderAndWriterIndex();
+ if (getBufferSize() == 0) {
+ buffers = new ArrowBuf[0];
+ } else {
+ buffers = new ArrowBuf[3];
+ buffers[0] = validityBuffer;
+ buffers[1] = offsetBuffer;
+ buffers[2] = valueBuffer;
+ }
+ if (clear) {
+ for (final ArrowBuf buffer : buffers) {
+ buffer.getReferenceManager().retain();
+ }
+ clear();
+ }
+ return buffers;
+ }
+
+ /**
+ * Construct a transfer pair of this vector and another vector of same type.
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @param callBack not used
+ * @return TransferPair
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) {
+ return getTransferPair(ref, allocator);
+ }
+
+ /**
+ * Construct a transfer pair of this vector and another vector of same type.
+ * @param allocator allocator for the target vector
+ * @return TransferPair
+ */
+ @Override
+ public TransferPair getTransferPair(BufferAllocator allocator) {
+ return getTransferPair(getName(), allocator);
+ }
+
+ /**
+ * Construct a transfer pair of this vector and another vector of same type.
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return TransferPair
+ */
+ public abstract TransferPair getTransferPair(String ref, BufferAllocator allocator);
+
+ /**
+ * Transfer this vector'data to another vector. The memory associated
+ * with this vector is transferred to the allocator of target vector
+ * for accounting and management purposes.
+ * @param target destination vector for transfer
+ */
+ public void transferTo(BaseVariableWidthVector target) {
+ compareTypes(target, "transferTo");
+ target.clear();
+ target.validityBuffer = transferBuffer(validityBuffer, target.allocator);
+ target.valueBuffer = transferBuffer(valueBuffer, target.allocator);
+ target.offsetBuffer = transferBuffer(offsetBuffer, target.allocator);
+ target.setLastSet(this.lastSet);
+ if (this.valueCount > 0) {
+ target.setValueCount(this.valueCount);
+ }
+ clear();
+ }
+
+ /**
+ * Slice this vector at desired index and length and transfer the
+ * corresponding data to the target vector.
+ * @param startIndex start position of the split in source vector.
+ * @param length length of the split.
+ * @param target destination vector
+ */
+ public void splitAndTransferTo(int startIndex, int length,
+ BaseVariableWidthVector target) {
+ Preconditions.checkArgument(startIndex >= 0 && length >= 0 && startIndex + length <= valueCount,
+ "Invalid parameters startIndex: %s, length: %s for valueCount: %s", startIndex, length, valueCount);
+ compareTypes(target, "splitAndTransferTo");
+ target.clear();
+ splitAndTransferValidityBuffer(startIndex, length, target);
+ splitAndTransferOffsetBuffer(startIndex, length, target);
+ target.setLastSet(length - 1);
+ if (length > 0) {
+ target.setValueCount(length);
+ }
+ }
+
+ /**
+ * Transfer the offsets along with data. Unlike the data buffer, we cannot simply
+ * slice the offset buffer for split and transfer. The reason is that offsets
+ * in the target vector have to be adjusted and made relative to the staring
+ * offset in source vector from the start index of split. This is why, we
+ * need to explicitly allocate the offset buffer and set the adjusted offsets
+ * in the target vector.
+ */
+ private void splitAndTransferOffsetBuffer(int startIndex, int length, BaseVariableWidthVector target) {
+ final int start = offsetBuffer.getInt((long) startIndex * OFFSET_WIDTH);
+ final int end = offsetBuffer.getInt((long) (startIndex + length) * OFFSET_WIDTH);
+ final int dataLength = end - start;
+
+ if (start == 0) {
+ final ArrowBuf slicedOffsetBuffer = offsetBuffer.slice(startIndex * OFFSET_WIDTH, (1 + length) * OFFSET_WIDTH);
+ target.offsetBuffer = transferBuffer(slicedOffsetBuffer, target.allocator);
+ } else {
+ target.allocateOffsetBuffer((long) (length + 1) * OFFSET_WIDTH);
+ for (int i = 0; i < length + 1; i++) {
+ final int relativeSourceOffset = offsetBuffer.getInt((long) (startIndex + i) * OFFSET_WIDTH) - start;
+ target.offsetBuffer.setInt((long) i * OFFSET_WIDTH, relativeSourceOffset);
+ }
+ }
+ final ArrowBuf slicedBuffer = valueBuffer.slice(start, dataLength);
+ target.valueBuffer = transferBuffer(slicedBuffer, target.allocator);
+ }
+
+ /*
+ * Transfer the validity.
+ */
+ private void splitAndTransferValidityBuffer(int startIndex, int length,
+ BaseVariableWidthVector target) {
+ if (length <= 0) {
+ return;
+ }
+
+ final int firstByteSource = BitVectorHelper.byteIndex(startIndex);
+ final int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1);
+ final int byteSizeTarget = getValidityBufferSizeFromCount(length);
+ final int offset = startIndex % 8;
+
+ if (offset == 0) {
+ // slice
+ if (target.validityBuffer != null) {
+ target.validityBuffer.getReferenceManager().release();
+ }
+ final ArrowBuf slicedValidityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget);
+ target.validityBuffer = transferBuffer(slicedValidityBuffer, target.allocator);
+ return;
+ }
+
+ /* Copy data
+ * When the first bit starts from the middle of a byte (offset != 0),
+ * copy data from src BitVector.
+ * Each byte in the target is composed by a part in i-th byte,
+ * another part in (i+1)-th byte.
+ */
+ target.allocateValidityBuffer(byteSizeTarget);
+
+ for (int i = 0; i < byteSizeTarget - 1; i++) {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(this.validityBuffer, firstByteSource + i, offset);
+ byte b2 = BitVectorHelper.getBitsFromNextByte(this.validityBuffer, firstByteSource + i + 1, offset);
+
+ target.validityBuffer.setByte(i, (b1 + b2));
+ }
+ /* Copying the last piece is done in the following manner:
+ * if the source vector has 1 or more bytes remaining, we copy
+ * the last piece as a byte formed by shifting data
+ * from the current byte and the next byte.
+ *
+ * if the source vector has no more bytes remaining
+ * (we are at the last byte), we copy the last piece as a byte
+ * by shifting data from the current byte.
+ */
+ if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(this.validityBuffer,
+ firstByteSource + byteSizeTarget - 1, offset);
+ byte b2 = BitVectorHelper.getBitsFromNextByte(this.validityBuffer,
+ firstByteSource + byteSizeTarget, offset);
+
+ target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2);
+ } else {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(this.validityBuffer,
+ firstByteSource + byteSizeTarget - 1, offset);
+ target.validityBuffer.setByte(byteSizeTarget - 1, b1);
+ }
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | common getters and setters |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the number of elements that are null in the vector.
+ *
+ * @return the number of null elements.
+ */
+ public int getNullCount() {
+ return BitVectorHelper.getNullCount(validityBuffer, valueCount);
+ }
+
+ /**
+ * Check if the given index is within the current value capacity
+ * of the vector.
+ *
+ * @param index position to check
+ * @return true if index is within the current value capacity
+ */
+ public boolean isSafe(int index) {
+ return index < getValueCapacity();
+ }
+
+ /**
+ * Check if element at given index is null.
+ *
+ * @param index position of element
+ * @return true if element at given index is null
+ */
+ public boolean isNull(int index) {
+ return (isSet(index) == 0);
+ }
+
+ /**
+ * Same as {@link #isNull(int)}.
+ *
+ * @param index position of element
+ * @return 1 if element at given index is not null, 0 otherwise
+ */
+ public int isSet(int index) {
+ final int byteIndex = index >> 3;
+ final byte b = validityBuffer.getByte(byteIndex);
+ final int bitIndex = index & 7;
+ return (b >> bitIndex) & 0x01;
+ }
+
+ /**
+ * Get the value count of vector. This will always be zero unless
+ * setValueCount(int) has been called prior to calling this.
+ *
+ * @return valueCount for the vector
+ */
+ public int getValueCount() {
+ return valueCount;
+ }
+
+ /**
+ * Sets the value count for the vector.
+ *
+ * @param valueCount value count
+ */
+ public void setValueCount(int valueCount) {
+ assert valueCount >= 0;
+ this.valueCount = valueCount;
+ while (valueCount > getValueCapacity()) {
+ reallocValidityAndOffsetBuffers();
+ }
+ fillHoles(valueCount);
+ lastSet = valueCount - 1;
+ setReaderAndWriterIndex();
+ }
+
+ /**
+ * Create holes in the vector upto the given index (exclusive).
+ * Holes will be created from the current last set position in
+ * the vector.
+ *
+ * @param index target index
+ */
+ public void fillEmpties(int index) {
+ handleSafe(index, emptyByteArray.length);
+ fillHoles(index);
+ lastSet = index - 1;
+ }
+
+ /**
+ * Set the index of last non-null element in the vector.
+ * It is important to call this method with appropriate value
+ * before calling {@link #setValueCount(int)}.
+ *
+ * @param value desired index of last non-null element.
+ */
+ public void setLastSet(int value) {
+ lastSet = value;
+ }
+
+ /**
+ * Get the index of last non-null element in the vector.
+ *
+ * @return index of the last non-null element
+ */
+ public int getLastSet() {
+ return lastSet;
+ }
+
+ /**
+ * Get the starting position (offset) in the data stream for a given
+ * element in the vector.
+ *
+ * @param index position of the element in the vector
+ * @return starting offset for the element
+ */
+ public long getStartEnd(int index) {
+ return offsetBuffer.getLong((long) index * OFFSET_WIDTH);
+ }
+
+ /**
+ * Mark the particular position in the vector as non-null.
+ *
+ * @param index position of the element.
+ */
+ @Override
+ public void setIndexDefined(int index) {
+ // We need to check and realloc both validity and offset buffer
+ while (index >= getValueCapacity()) {
+ reallocValidityAndOffsetBuffers();
+ }
+ BitVectorHelper.setBit(validityBuffer, index);
+ }
+
+ /**
+ * Sets the value length for an element.
+ *
+ * @param index position of the element to set
+ * @param length length of the element
+ */
+ public void setValueLengthSafe(int index, int length) {
+ assert index >= 0;
+ handleSafe(index, length);
+ fillHoles(index);
+ final int startOffset = getStartOffset(index);
+ offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + length);
+ lastSet = index;
+ }
+
+ /**
+ * Get the variable length element at specified index as Text.
+ *
+ * @param index position of element to get
+ * @return greater than 0 length for non-null element, 0 otherwise
+ */
+ public int getValueLength(int index) {
+ assert index >= 0;
+ if (isSet(index) == 0) {
+ return 0;
+ }
+ final int startOffset = getStartOffset(index);
+ final int dataLength =
+ offsetBuffer.getInt((index + 1) * OFFSET_WIDTH) - startOffset;
+ return dataLength;
+ }
+
+ /**
+ * Set the variable length element at the specified index to the supplied
+ * byte array. This is same as using {@link #set(int, byte[], int, int)}
+ * with start as 0 and length as value.length
+ *
+ * @param index position of the element to set
+ * @param value array of bytes to write
+ */
+ public void set(int index, byte[] value) {
+ assert index >= 0;
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ setBytes(index, value, 0, value.length);
+ lastSet = index;
+ }
+
+ /**
+ * Same as {@link #set(int, byte[])} except that it handles the
+ * case where index and length of new element are beyond the existing
+ * capacity of the vector.
+ *
+ * @param index position of the element to set
+ * @param value array of bytes to write
+ */
+ public void setSafe(int index, byte[] value) {
+ assert index >= 0;
+ handleSafe(index, value.length);
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ setBytes(index, value, 0, value.length);
+ lastSet = index;
+ }
+
+ /**
+ * Set the variable length element at the specified index to the supplied
+ * byte array.
+ *
+ * @param index position of the element to set
+ * @param value array of bytes to write
+ * @param start start index in array of bytes
+ * @param length length of data in array of bytes
+ */
+ public void set(int index, byte[] value, int start, int length) {
+ assert index >= 0;
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ setBytes(index, value, start, length);
+ lastSet = index;
+ }
+
+ /**
+ * Same as {@link #set(int, byte[], int, int)} except that it handles the
+ * case where index and length of new element are beyond the existing
+ * capacity of the vector.
+ *
+ * @param index position of the element to set
+ * @param value array of bytes to write
+ * @param start start index in array of bytes
+ * @param length length of data in array of bytes
+ */
+ public void setSafe(int index, byte[] value, int start, int length) {
+ assert index >= 0;
+ handleSafe(index, length);
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ setBytes(index, value, start, length);
+ lastSet = index;
+ }
+
+ /**
+ * Set the variable length element at the specified index to the
+ * content in supplied ByteBuffer.
+ *
+ * @param index position of the element to set
+ * @param value ByteBuffer with data
+ * @param start start index in ByteBuffer
+ * @param length length of data in ByteBuffer
+ */
+ public void set(int index, ByteBuffer value, int start, int length) {
+ assert index >= 0;
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ final int startOffset = getStartOffset(index);
+ offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + length);
+ valueBuffer.setBytes(startOffset, value, start, length);
+ lastSet = index;
+ }
+
+ /**
+ * Same as {@link #set(int, ByteBuffer, int, int)} except that it handles the
+ * case where index and length of new element are beyond the existing
+ * capacity of the vector.
+ *
+ * @param index position of the element to set
+ * @param value ByteBuffer with data
+ * @param start start index in ByteBuffer
+ * @param length length of data in ByteBuffer
+ */
+ public void setSafe(int index, ByteBuffer value, int start, int length) {
+ assert index >= 0;
+ handleSafe(index, length);
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ final int startOffset = getStartOffset(index);
+ offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + length);
+ valueBuffer.setBytes(startOffset, value, start, length);
+ lastSet = index;
+ }
+
+ /**
+ * Set the element at the given index to null.
+ *
+ * @param index position of element
+ */
+ public void setNull(int index) {
+ // We need to check and realloc both validity and offset buffer
+ while (index >= getValueCapacity()) {
+ reallocValidityAndOffsetBuffers();
+ }
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param start start position of data in buffer
+ * @param end end position of data in buffer
+ * @param buffer data buffer containing the variable width element to be stored
+ * in the vector
+ */
+ public void set(int index, int isSet, int start, int end, ArrowBuf buffer) {
+ assert index >= 0;
+ final int dataLength = end - start;
+ fillHoles(index);
+ BitVectorHelper.setValidityBit(validityBuffer, index, isSet);
+ final int startOffset = offsetBuffer.getInt((long) index * OFFSET_WIDTH);
+ offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+ valueBuffer.setBytes(startOffset, buffer, start, dataLength);
+ lastSet = index;
+ }
+
+ /**
+ * Same as {@link #set(int, int, int, int, ArrowBuf)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param start start position of data in buffer
+ * @param end end position of data in buffer
+ * @param buffer data buffer containing the variable width element to be stored
+ * in the vector
+ */
+ public void setSafe(int index, int isSet, int start, int end, ArrowBuf buffer) {
+ assert index >= 0;
+ final int dataLength = end - start;
+ handleSafe(index, dataLength);
+ fillHoles(index);
+ BitVectorHelper.setValidityBit(validityBuffer, index, isSet);
+ final int startOffset = offsetBuffer.getInt((long) index * OFFSET_WIDTH);
+ offsetBuffer.setInt((long) (index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+ valueBuffer.setBytes(startOffset, buffer, start, dataLength);
+ lastSet = index;
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ * @param index position of the new value
+ * @param start start position of data in buffer
+ * @param length length of data in buffer
+ * @param buffer data buffer containing the variable width element to be stored
+ * in the vector
+ */
+ public void set(int index, int start, int length, ArrowBuf buffer) {
+ assert index >= 0;
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ final int startOffset = offsetBuffer.getInt((long) index * OFFSET_WIDTH);
+ offsetBuffer.setInt((long) (index + 1) * OFFSET_WIDTH, startOffset + length);
+ final ArrowBuf bb = buffer.slice(start, length);
+ valueBuffer.setBytes(startOffset, bb);
+ lastSet = index;
+ }
+
+ /**
+ * Same as {@link #set(int, int, int, int, ArrowBuf)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ * @param index position of the new value
+ * @param start start position of data in buffer
+ * @param length length of data in buffer
+ * @param buffer data buffer containing the variable width element to be stored
+ * in the vector
+ */
+ public void setSafe(int index, int start, int length, ArrowBuf buffer) {
+ assert index >= 0;
+ handleSafe(index, length);
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ final int startOffset = offsetBuffer.getInt((long) index * OFFSET_WIDTH);
+ offsetBuffer.setInt((long) (index + 1) * OFFSET_WIDTH, startOffset + length);
+ final ArrowBuf bb = buffer.slice(start, length);
+ valueBuffer.setBytes(startOffset, bb);
+ lastSet = index;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | helper methods for setters |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ protected final void fillHoles(int index) {
+ for (int i = lastSet + 1; i < index; i++) {
+ setBytes(i, emptyByteArray, 0, emptyByteArray.length);
+ }
+ lastSet = index - 1;
+ }
+
+ protected final void setBytes(int index, byte[] value, int start, int length) {
+ /* end offset of current last element in the vector. this will
+ * be the start offset of new element we are trying to store.
+ */
+ final int startOffset = getStartOffset(index);
+ /* set new end offset */
+ offsetBuffer.setInt((long) (index + 1) * OFFSET_WIDTH, startOffset + length);
+ /* store the var length data in value buffer */
+ valueBuffer.setBytes(startOffset, value, start, length);
+ }
+
+ public final int getStartOffset(int index) {
+ return offsetBuffer.getInt((long) index * OFFSET_WIDTH);
+ }
+
+ protected final void handleSafe(int index, int dataLength) {
+ /*
+ * IMPORTANT:
+ * value buffer for variable length vectors moves independent
+ * of the companion validity and offset buffers. This is in
+ * contrast to what we have for fixed width vectors.
+ *
+ * Here there is no concept of getValueCapacity() in the
+ * data stream. getValueCapacity() is applicable only to validity
+ * and offset buffers.
+ *
+ * So even though we may have setup an initial capacity of 1024
+ * elements in the vector, it is quite possible
+ * that we need to reAlloc() the data buffer when we are setting
+ * the 5th element in the vector simply because previous
+ * variable length elements have exhausted the buffer capacity.
+ * However, we really don't need to reAlloc() validity and
+ * offset buffers until we try to set the 1025th element
+ * This is why we do a separate check for safe methods to
+ * determine which buffer needs reallocation.
+ */
+ while (index >= getValueCapacity()) {
+ reallocValidityAndOffsetBuffers();
+ }
+ final int startOffset = lastSet < 0 ? 0 : getStartOffset(lastSet + 1);
+ while (valueBuffer.capacity() < (startOffset + dataLength)) {
+ reallocDataBuffer();
+ }
+ }
+
+ /**
+ * Method used by Json Writer to read a variable width element from
+ * the variable width vector and write to Json.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param data buffer storing the variable width vector elements
+ * @param offset buffer storing the offsets of variable width vector elements
+ * @param index position of the element in the vector
+ * @return array of bytes
+ */
+ public static byte[] get(final ArrowBuf data, final ArrowBuf offset, int index) {
+ final int currentStartOffset = offset.getInt((long) index * OFFSET_WIDTH);
+ final int dataLength =
+ offset.getInt((long) (index + 1) * OFFSET_WIDTH) - currentStartOffset;
+ final byte[] result = new byte[dataLength];
+ data.getBytes(currentStartOffset, result, 0, dataLength);
+ return result;
+ }
+
+ /**
+ * Method used by Json Reader to explicitly set the offsets of the variable
+ * width vector data. The method takes care of allocating the memory for
+ * offsets if the caller hasn't done so.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer ArrowBuf to store offsets for variable width elements
+ * @param allocator memory allocator
+ * @param valueCount number of elements
+ * @param index position of the element
+ * @param value offset of the element
+ * @return buffer holding the offsets
+ */
+ public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator,
+ int valueCount, int index, int value) {
+ if (buffer == null) {
+ buffer = allocator.buffer((long) valueCount * OFFSET_WIDTH);
+ }
+ buffer.setInt((long) index * OFFSET_WIDTH, value);
+ if (index == (valueCount - 1)) {
+ buffer.writerIndex((long) valueCount * OFFSET_WIDTH);
+ }
+
+ return buffer;
+ }
+
+ /**
+ * Copy a cell value from a particular index in source vector to a particular
+ * position in this vector.
+ *
+ * @param fromIndex position to copy from in source vector
+ * @param thisIndex position to copy to in this vector
+ * @param from source vector
+ */
+ @Override
+ public void copyFrom(int fromIndex, int thisIndex, ValueVector from) {
+ Preconditions.checkArgument(this.getMinorType() == from.getMinorType());
+ if (from.isNull(fromIndex)) {
+ fillHoles(thisIndex);
+ BitVectorHelper.unsetBit(this.validityBuffer, thisIndex);
+ final int copyStart = offsetBuffer.getInt((long) thisIndex * OFFSET_WIDTH);
+ offsetBuffer.setInt((long) (thisIndex + 1) * OFFSET_WIDTH, copyStart);
+ } else {
+ final int start = from.getOffsetBuffer().getInt((long) fromIndex * OFFSET_WIDTH);
+ final int end = from.getOffsetBuffer().getInt((long) (fromIndex + 1) * OFFSET_WIDTH);
+ final int length = end - start;
+ fillHoles(thisIndex);
+ BitVectorHelper.setBit(this.validityBuffer, thisIndex);
+ final int copyStart = offsetBuffer.getInt((long) thisIndex * OFFSET_WIDTH);
+ from.getDataBuffer().getBytes(start, this.valueBuffer, copyStart, length);
+ offsetBuffer.setInt((long) (thisIndex + 1) * OFFSET_WIDTH, copyStart + length);
+ }
+ lastSet = thisIndex;
+ }
+
+ /**
+ * Same as {@link #copyFrom(int, int, ValueVector)} except that
+ * it handles the case when the capacity of the vector needs to be expanded
+ * before copy.
+ *
+ * @param fromIndex position to copy from in source vector
+ * @param thisIndex position to copy to in this vector
+ * @param from source vector
+ */
+ @Override
+ public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) {
+ Preconditions.checkArgument(this.getMinorType() == from.getMinorType());
+ if (from.isNull(fromIndex)) {
+ handleSafe(thisIndex, 0);
+ fillHoles(thisIndex);
+ BitVectorHelper.unsetBit(this.validityBuffer, thisIndex);
+ final int copyStart = offsetBuffer.getInt(thisIndex * OFFSET_WIDTH);
+ offsetBuffer.setInt((long) (thisIndex + 1) * OFFSET_WIDTH, copyStart);
+ } else {
+ final int start = from.getOffsetBuffer().getInt((long) fromIndex * OFFSET_WIDTH);
+ final int end = from.getOffsetBuffer().getInt((long) (fromIndex + 1) * OFFSET_WIDTH);
+ final int length = end - start;
+ handleSafe(thisIndex, length);
+ fillHoles(thisIndex);
+ BitVectorHelper.setBit(this.validityBuffer, thisIndex);
+ final int copyStart = offsetBuffer.getInt((long) thisIndex * OFFSET_WIDTH);
+ from.getDataBuffer().getBytes(start, this.valueBuffer, copyStart, length);
+ offsetBuffer.setInt((long) (thisIndex + 1) * OFFSET_WIDTH, copyStart + length);
+ }
+ lastSet = thisIndex;
+ }
+
+ @Override
+ public ArrowBufPointer getDataPointer(int index) {
+ return getDataPointer(index, new ArrowBufPointer());
+ }
+
+ @Override
+ public ArrowBufPointer getDataPointer(int index, ArrowBufPointer reuse) {
+ if (isNull(index)) {
+ reuse.set(null, 0, 0);
+ } else {
+ int offset = offsetBuffer.getInt((long) index * OFFSET_WIDTH);
+ int length = offsetBuffer.getInt((long) (index + 1) * OFFSET_WIDTH) - offset;
+ reuse.set(valueBuffer, offset, length);
+ }
+ return reuse;
+ }
+
+ @Override
+ public int hashCode(int index) {
+ return hashCode(index, null);
+ }
+
+ @Override
+ public int hashCode(int index, ArrowBufHasher hasher) {
+ if (isNull(index)) {
+ return ArrowBufPointer.NULL_HASH_CODE;
+ }
+ final int start = getStartOffset(index);
+ final int end = getStartOffset(index + 1);
+ return ByteFunctionHelpers.hash(hasher, this.getDataBuffer(), start, end);
+ }
+
+ @Override
+ public <OUT, IN> OUT accept(VectorVisitor<OUT, IN> visitor, IN value) {
+ return visitor.visit(this, value);
+ }
+
+ /**
+ * Gets the ending offset of a record, given its index.
+ */
+ public final int getEndOffset(int index) {
+ return offsetBuffer.getInt((long) (index + 1) * OFFSET_WIDTH);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BigIntVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BigIntVector.java
new file mode 100644
index 000000000..c19955b54
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BigIntVector.java
@@ -0,0 +1,358 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.BigIntReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.BigIntHolder;
+import org.apache.arrow.vector.holders.NullableBigIntHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * BigIntVector implements a fixed width vector (8 bytes) of
+ * integer values which could be null. A validity buffer (bit vector) is
+ * maintained to track which elements in the vector are null.
+ */
+public final class BigIntVector extends BaseFixedWidthVector implements BaseIntVector {
+ public static final byte TYPE_WIDTH = 8;
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a BigIntVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public BigIntVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.BIGINT.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a BigIntVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public BigIntVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a BigIntVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public BigIntVector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ reader = new BigIntReaderImpl(BigIntVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.BIGINT;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public long get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableBigIntHolder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Long getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+ }
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ private void setValue(int index, long value) {
+ valueBuffer.setLong((long) index * TYPE_WIDTH, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, long value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableBigIntHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, BigIntHolder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, long)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, long value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableBigIntHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableBigIntHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, BigIntHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, BigIntHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void set(int index, int isSet, long value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, long)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void setSafe(int index, int isSet, long value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+ /**
+ * Given a data buffer, get the value stored at a particular position
+ * in the vector.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return value stored at the index.
+ */
+ public static long get(final ArrowBuf buffer, final int index) {
+ return buffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((BigIntVector) to);
+ }
+
+ @Override
+ public void setWithPossibleTruncate(int index, long value) {
+ this.setSafe(index, value);
+ }
+
+ @Override
+ public void setUnsafeWithPossibleTruncate(int index, long value) {
+ this.set(index, value);
+ }
+
+ @Override
+ public long getValueAsLong(int index) {
+ return this.get(index);
+ }
+
+ private class TransferImpl implements TransferPair {
+ BigIntVector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new BigIntVector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(BigIntVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public BigIntVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, BigIntVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java
new file mode 100644
index 000000000..3bcfd983e
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java
@@ -0,0 +1,599 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt;
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.util.ArrowBufPointer;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.complex.impl.BitReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.BitHolder;
+import org.apache.arrow.vector.holders.NullableBitHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.OversizedAllocationException;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * BitVector implements a fixed width (1 bit) vector of
+ * boolean values which could be null. Each value in the vector corresponds
+ * to a single bit in the underlying data stream backing the vector.
+ */
+public final class BitVector extends BaseFixedWidthVector {
+
+ private static final int HASH_CODE_FOR_ZERO = 17;
+
+ private static final int HASH_CODE_FOR_ONE = 19;
+
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a BitVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public BitVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.BIT.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a BitVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public BitVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a BitVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field the Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public BitVector(Field field, BufferAllocator allocator) {
+ super(field, allocator, 0);
+ reader = new BitReaderImpl(BitVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.BIT;
+ }
+
+ /**
+ * Sets the desired value capacity for the vector. This function doesn't
+ * allocate any memory for the vector.
+ *
+ * @param valueCount desired number of elements in the vector
+ */
+ @Override
+ public void setInitialCapacity(int valueCount) {
+ final int size = getValidityBufferSizeFromCount(valueCount);
+ if (size * 2 > MAX_ALLOCATION_SIZE) {
+ throw new OversizedAllocationException("Requested amount of memory is more than max allowed");
+ }
+ lastValueCapacity = valueCount;
+ }
+
+ @Override
+ protected int getValueBufferValueCapacity() {
+ return capAtMaxInt(valueBuffer.capacity() * 8);
+ }
+
+ /**
+ * Get the potential buffer size for a particular number of records.
+ *
+ * @param count desired number of elements in the vector
+ * @return estimated size of underlying buffers if the vector holds
+ * a given number of elements
+ */
+ @Override
+ public int getBufferSizeFor(final int count) {
+ if (count == 0) {
+ return 0;
+ }
+ return 2 * getValidityBufferSizeFromCount(count);
+ }
+
+ /**
+ * Get the size (number of bytes) of underlying buffers used by this vector.
+ *
+ * @return size of underlying buffers.
+ */
+ @Override
+ public int getBufferSize() {
+ return getBufferSizeFor(valueCount);
+ }
+
+ /**
+ * Slice this vector at desired index and length and transfer the
+ * corresponding data to the target vector.
+ *
+ * @param startIndex start position of the split in source vector.
+ * @param length length of the split.
+ * @param target destination vector
+ */
+ public void splitAndTransferTo(int startIndex, int length, BaseFixedWidthVector target) {
+ Preconditions.checkArgument(startIndex >= 0 && length >= 0 && startIndex + length <= valueCount,
+ "Invalid parameters startIndex: %s, length: %s for valueCount: %s", startIndex, length, valueCount);
+ compareTypes(target, "splitAndTransferTo");
+ target.clear();
+ target.validityBuffer = splitAndTransferBuffer(startIndex, length, target,
+ validityBuffer, target.validityBuffer);
+ target.valueBuffer = splitAndTransferBuffer(startIndex, length, target,
+ valueBuffer, target.valueBuffer);
+ target.refreshValueCapacity();
+
+ target.setValueCount(length);
+ }
+
+ private ArrowBuf splitAndTransferBuffer(
+ int startIndex,
+ int length,
+ BaseFixedWidthVector target,
+ ArrowBuf sourceBuffer,
+ ArrowBuf destBuffer) {
+ int firstByteSource = BitVectorHelper.byteIndex(startIndex);
+ int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1);
+ int byteSizeTarget = getValidityBufferSizeFromCount(length);
+ int offset = startIndex % 8;
+
+ if (length > 0) {
+ if (offset == 0) {
+ /* slice */
+ if (destBuffer != null) {
+ destBuffer.getReferenceManager().release();
+ }
+ destBuffer = sourceBuffer.slice(firstByteSource, byteSizeTarget);
+ destBuffer.getReferenceManager().retain(1);
+ } else {
+ /* Copy data
+ * When the first bit starts from the middle of a byte (offset != 0),
+ * copy data from src BitVector.
+ * Each byte in the target is composed by a part in i-th byte,
+ * another part in (i+1)-th byte.
+ */
+ destBuffer = allocator.buffer(byteSizeTarget);
+ destBuffer.readerIndex(0);
+ destBuffer.setZero(0, destBuffer.capacity());
+
+ for (int i = 0; i < byteSizeTarget - 1; i++) {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(sourceBuffer, firstByteSource + i, offset);
+ byte b2 = BitVectorHelper.getBitsFromNextByte(sourceBuffer, firstByteSource + i + 1, offset);
+
+ destBuffer.setByte(i, (b1 + b2));
+ }
+
+ /* Copying the last piece is done in the following manner:
+ * if the source vector has 1 or more bytes remaining, we copy
+ * the last piece as a byte formed by shifting data
+ * from the current byte and the next byte.
+ *
+ * if the source vector has no more bytes remaining
+ * (we are at the last byte), we copy the last piece as a byte
+ * by shifting data from the current byte.
+ */
+ if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(sourceBuffer,
+ firstByteSource + byteSizeTarget - 1, offset);
+ byte b2 = BitVectorHelper.getBitsFromNextByte(sourceBuffer,
+ firstByteSource + byteSizeTarget, offset);
+
+ destBuffer.setByte(byteSizeTarget - 1, b1 + b2);
+ } else {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(sourceBuffer,
+ firstByteSource + byteSizeTarget - 1, offset);
+ destBuffer.setByte(byteSizeTarget - 1, b1);
+ }
+ }
+ }
+
+ return destBuffer;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+ private int getBit(int index) {
+ final int byteIndex = index >> 3;
+ final byte b = valueBuffer.getByte(byteIndex);
+ final int bitIndex = index & 7;
+ return (b >> bitIndex) & 0x01;
+ }
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public int get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return getBit(index);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableBitHolder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = getBit(index);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Boolean getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return new Boolean(getBit(index) != 0);
+ }
+ }
+
+ /**
+ * Copy a cell value from a particular index in source vector to a particular
+ * position in this vector.
+ *
+ * @param fromIndex position to copy from in source vector
+ * @param thisIndex position to copy to in this vector
+ * @param from source vector
+ */
+ @Override
+ public void copyFrom(int fromIndex, int thisIndex, ValueVector from) {
+ Preconditions.checkArgument(this.getMinorType() == from.getMinorType());
+ boolean fromIsSet = BitVectorHelper.get(from.getValidityBuffer(), fromIndex) != 0;
+ if (fromIsSet) {
+ BitVectorHelper.setBit(validityBuffer, thisIndex);
+ BitVectorHelper.setValidityBit(valueBuffer, thisIndex, ((BitVector) from).getBit(fromIndex));
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, thisIndex);
+ }
+ }
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, int value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ if (value != 0) {
+ BitVectorHelper.setBit(valueBuffer, index);
+ } else {
+ BitVectorHelper.unsetBit(valueBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableBitHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ if (holder.value != 0) {
+ BitVectorHelper.setBit(valueBuffer, index);
+ } else {
+ BitVectorHelper.unsetBit(valueBuffer, index);
+ }
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, BitHolder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ if (holder.value != 0) {
+ BitVectorHelper.setBit(valueBuffer, index);
+ } else {
+ BitVectorHelper.unsetBit(valueBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, int value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableBitHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableBitHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, BitHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, BitHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void set(int index, int isSet, int value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, int)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void setSafe(int index, int isSet, int value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+ /**
+ * Set the element at the given index to one.
+ *
+ * @param index position of element
+ */
+ public void setToOne(int index) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ BitVectorHelper.setBit(valueBuffer, index);
+ }
+
+ /**
+ * Same as {@link #setToOne(int)} except that it handles the case when
+ * index is greater than or equal to current value capacity of the vector.
+ *
+ * @param index position of the element
+ */
+ public void setSafeToOne(int index) {
+ handleSafe(index);
+ setToOne(index);
+ }
+
+ @Override
+ public ArrowBufPointer getDataPointer(int index) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public ArrowBufPointer getDataPointer(int index, ArrowBufPointer reuse) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public int hashCode(int index) {
+ if (isNull(index)) {
+ return ArrowBufPointer.NULL_HASH_CODE;
+ } else {
+ if (get(index) == 0) {
+ return HASH_CODE_FOR_ZERO;
+ } else {
+ return HASH_CODE_FOR_ONE;
+ }
+ }
+ }
+
+ @Override
+ public int hashCode(int index, ArrowBufHasher hasher) {
+ return hashCode(index);
+ }
+
+ /**
+ * Set count bits to 1 in data starting at firstBitIndex.
+ *
+ * @param firstBitIndex the index of the first bit to set
+ * @param count the number of bits to set
+ */
+ public void setRangeToOne(int firstBitIndex, int count) {
+ int startByteIndex = BitVectorHelper.byteIndex(firstBitIndex);
+ final int lastBitIndex = firstBitIndex + count;
+ final int endByteIndex = BitVectorHelper.byteIndex(lastBitIndex);
+ final int startByteBitIndex = BitVectorHelper.bitIndex(firstBitIndex);
+ final int endBytebitIndex = BitVectorHelper.bitIndex(lastBitIndex);
+ if (count < 8 && startByteIndex == endByteIndex) {
+ // handles the case where we don't have a first and a last byte
+ byte bitMask = 0;
+ for (int i = startByteBitIndex; i < endBytebitIndex; ++i) {
+ bitMask |= (byte) (1L << i);
+ }
+ BitVectorHelper.setBitMaskedByte(validityBuffer, startByteIndex, bitMask);
+ BitVectorHelper.setBitMaskedByte(valueBuffer, startByteIndex, bitMask);
+ } else {
+ // fill in first byte (if it's not full)
+ if (startByteBitIndex != 0) {
+ final byte bitMask = (byte) (0xFFL << startByteBitIndex);
+ BitVectorHelper.setBitMaskedByte(validityBuffer, startByteIndex, bitMask);
+ BitVectorHelper.setBitMaskedByte(valueBuffer, startByteIndex, bitMask);
+ ++startByteIndex;
+ }
+
+ // fill in one full byte at a time
+ validityBuffer.setOne(startByteIndex, endByteIndex - startByteIndex);
+ valueBuffer.setOne(startByteIndex, endByteIndex - startByteIndex);
+
+ // fill in the last byte (if it's not full)
+ if (endBytebitIndex != 0) {
+ final int byteIndex = BitVectorHelper.byteIndex(lastBitIndex - endBytebitIndex);
+ final byte bitMask = (byte) (0xFFL >>> ((8 - endBytebitIndex) & 7));
+ BitVectorHelper.setBitMaskedByte(validityBuffer, byteIndex, bitMask);
+ BitVectorHelper.setBitMaskedByte(valueBuffer, byteIndex, bitMask);
+ }
+ }
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((BitVector) to);
+ }
+
+ private class TransferImpl implements TransferPair {
+ BitVector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new BitVector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(BitVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public BitVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, BitVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BitVectorHelper.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BitVectorHelper.java
new file mode 100644
index 000000000..3745c5a75
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BitVectorHelper.java
@@ -0,0 +1,449 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static io.netty.util.internal.PlatformDependent.getByte;
+import static io.netty.util.internal.PlatformDependent.getInt;
+import static io.netty.util.internal.PlatformDependent.getLong;
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BoundsChecking;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.util.DataSizeRoundingUtil;
+
+import io.netty.util.internal.PlatformDependent;
+
+/**
+ * Helper class for performing generic operations on a bit vector buffer.
+ * External use of this class is not recommended.
+ */
+public class BitVectorHelper {
+
+ private BitVectorHelper() {}
+
+ /**
+ * Get the index of byte corresponding to bit index in validity buffer.
+ */
+ public static long byteIndex(long absoluteBitIndex) {
+ return absoluteBitIndex >> 3;
+ }
+
+ /**
+ * Get the relative index of bit within the byte in validity buffer.
+ */
+ public static int bitIndex(long absoluteBitIndex) {
+ return checkedCastToInt(absoluteBitIndex & 7);
+ }
+
+ /**
+ * Get the index of byte corresponding to bit index in validity buffer.
+ */
+ public static int byteIndex(int absoluteBitIndex) {
+ return absoluteBitIndex >> 3;
+ }
+
+ /**
+ * Get the relative index of bit within the byte in validity buffer.
+ */
+ public static int bitIndex(int absoluteBitIndex) {
+ return absoluteBitIndex & 7;
+ }
+
+ /**
+ * Set the bit at provided index to 1.
+ *
+ * @param validityBuffer validity buffer of the vector
+ * @param index index to be set
+ */
+ public static void setBit(ArrowBuf validityBuffer, long index) {
+ // it can be observed that some logic is duplicate of the logic in setValidityBit.
+ // this is because JIT cannot always remove the if branch in setValidityBit,
+ // so we give a dedicated implementation for setting bits.
+ final long byteIndex = byteIndex(index);
+ final int bitIndex = bitIndex(index);
+
+ // the byte is promoted to an int, because according to Java specification,
+ // bytes will be promoted to ints automatically, upon expression evaluation.
+ // by promoting it manually, we avoid the unnecessary conversions.
+ int currentByte = validityBuffer.getByte(byteIndex);
+ final int bitMask = 1 << bitIndex;
+ currentByte |= bitMask;
+ validityBuffer.setByte(byteIndex, currentByte);
+ }
+
+ /**
+ * Set the bit at provided index to 0.
+ *
+ * @param validityBuffer validity buffer of the vector
+ * @param index index to be set
+ */
+ public static void unsetBit(ArrowBuf validityBuffer, int index) {
+ // it can be observed that some logic is duplicate of the logic in setValidityBit.
+ // this is because JIT cannot always remove the if branch in setValidityBit,
+ // so we give a dedicated implementation for unsetting bits.
+ final int byteIndex = byteIndex(index);
+ final int bitIndex = bitIndex(index);
+
+ // the byte is promoted to an int, because according to Java specification,
+ // bytes will be promoted to ints automatically, upon expression evaluation.
+ // by promoting it manually, we avoid the unnecessary conversions.
+ int currentByte = validityBuffer.getByte(byteIndex);
+ final int bitMask = 1 << bitIndex;
+ currentByte &= ~bitMask;
+ validityBuffer.setByte(byteIndex, currentByte);
+ }
+
+ /**
+ * Set the bit at a given index to provided value (1 or 0).
+ *
+ * @param validityBuffer validity buffer of the vector
+ * @param index index to be set
+ * @param value value to set
+ */
+ public static void setValidityBit(ArrowBuf validityBuffer, int index, int value) {
+ final int byteIndex = byteIndex(index);
+ final int bitIndex = bitIndex(index);
+
+ // the byte is promoted to an int, because according to Java specification,
+ // bytes will be promoted to ints automatically, upon expression evaluation.
+ // by promoting it manually, we avoid the unnecessary conversions.
+ int currentByte = validityBuffer.getByte(byteIndex);
+ final int bitMask = 1 << bitIndex;
+ if (value != 0) {
+ currentByte |= bitMask;
+ } else {
+ currentByte &= ~bitMask;
+ }
+ validityBuffer.setByte(byteIndex, currentByte);
+ }
+
+ /**
+ * Set the bit at a given index to provided value (1 or 0). Internally
+ * takes care of allocating the buffer if the caller didn't do so.
+ *
+ * @param validityBuffer validity buffer of the vector
+ * @param allocator allocator for the buffer
+ * @param valueCount number of values to allocate/set
+ * @param index index to be set
+ * @param value value to set
+ * @return ArrowBuf
+ */
+ public static ArrowBuf setValidityBit(ArrowBuf validityBuffer, BufferAllocator allocator,
+ int valueCount, int index, int value) {
+ if (validityBuffer == null) {
+ validityBuffer = allocator.buffer(getValidityBufferSize(valueCount));
+ }
+ setValidityBit(validityBuffer, index, value);
+ if (index == (valueCount - 1)) {
+ validityBuffer.writerIndex(getValidityBufferSize(valueCount));
+ }
+
+ return validityBuffer;
+ }
+
+ /**
+ * Check if a bit at a given index is set or not.
+ *
+ * @param buffer buffer to check
+ * @param index index of the buffer
+ * @return 1 if bit is set, 0 otherwise.
+ */
+ public static int get(final ArrowBuf buffer, int index) {
+ final int byteIndex = index >> 3;
+ final byte b = buffer.getByte(byteIndex);
+ final int bitIndex = index & 7;
+ return (b >> bitIndex) & 0x01;
+ }
+
+ /**
+ * Compute the size of validity buffer required to manage a given number
+ * of elements in a vector.
+ *
+ * @param valueCount number of elements in the vector
+ * @return buffer size
+ */
+ public static int getValidityBufferSize(int valueCount) {
+ return DataSizeRoundingUtil.divideBy8Ceil(valueCount);
+ }
+
+ /**
+ * Given a validity buffer, find the number of bits that are not set.
+ * This is used to compute the number of null elements in a nullable vector.
+ *
+ * @param validityBuffer validity buffer of the vector
+ * @param valueCount number of values in the vector
+ * @return number of bits not set.
+ */
+ public static int getNullCount(final ArrowBuf validityBuffer, final int valueCount) {
+ if (valueCount == 0) {
+ return 0;
+ }
+ int count = 0;
+ final int sizeInBytes = getValidityBufferSize(valueCount);
+ // If value count is not a multiple of 8, then calculate number of used bits in the last byte
+ final int remainder = valueCount % 8;
+ final int fullBytesCount = remainder == 0 ? sizeInBytes : sizeInBytes - 1;
+
+ int index = 0;
+ while (index + 8 <= fullBytesCount) {
+ long longValue = validityBuffer.getLong(index);
+ count += Long.bitCount(longValue);
+ index += 8;
+ }
+
+ if (index + 4 <= fullBytesCount) {
+ int intValue = validityBuffer.getInt(index);
+ count += Integer.bitCount(intValue);
+ index += 4;
+ }
+
+ while (index < fullBytesCount) {
+ byte byteValue = validityBuffer.getByte(index);
+ count += Integer.bitCount(byteValue & 0xFF);
+ index += 1;
+ }
+
+ // handling with the last bits
+ if (remainder != 0) {
+ byte byteValue = validityBuffer.getByte(sizeInBytes - 1);
+
+ // making the remaining bits all 1s if it is not fully filled
+ byte mask = (byte) (0xFF << remainder);
+ byteValue = (byte) (byteValue | mask);
+ count += Integer.bitCount(byteValue & 0xFF);
+ }
+
+ return 8 * sizeInBytes - count;
+ }
+
+ /**
+ * Tests if all bits in a validity buffer are equal 0 or 1, according to the specified parameter.
+ * @param validityBuffer the validity buffer.
+ * @param valueCount the bit count.
+ * @param checkOneBits if set to true, the method checks if all bits are equal to 1;
+ * otherwise, it checks if all bits are equal to 0.
+ * @return true if all bits are 0 or 1 according to the parameter, and false otherwise.
+ */
+ public static boolean checkAllBitsEqualTo(
+ final ArrowBuf validityBuffer, final int valueCount, final boolean checkOneBits) {
+ if (valueCount == 0) {
+ return true;
+ }
+ final int sizeInBytes = getValidityBufferSize(valueCount);
+
+ // boundary check
+ validityBuffer.checkBytes(0, sizeInBytes);
+
+ // If value count is not a multiple of 8, then calculate number of used bits in the last byte
+ final int remainder = valueCount % 8;
+ final int fullBytesCount = remainder == 0 ? sizeInBytes : sizeInBytes - 1;
+
+ // the integer number to compare against
+ final int intToCompare = checkOneBits ? -1 : 0;
+
+ int index = 0;
+ while (index + 8 <= fullBytesCount) {
+ long longValue = getLong(validityBuffer.memoryAddress() + index);
+ if (longValue != (long) intToCompare) {
+ return false;
+ }
+ index += 8;
+ }
+
+ if (index + 4 <= fullBytesCount) {
+ int intValue = getInt(validityBuffer.memoryAddress() + index);
+ if (intValue != intToCompare) {
+ return false;
+ }
+ index += 4;
+ }
+
+ while (index < fullBytesCount) {
+ byte byteValue = getByte(validityBuffer.memoryAddress() + index);
+ if (byteValue != (byte) intToCompare) {
+ return false;
+ }
+ index += 1;
+ }
+
+ // handling with the last bits
+ if (remainder != 0) {
+ byte byteValue = getByte(validityBuffer.memoryAddress() + sizeInBytes - 1);
+ byte mask = (byte) ((1 << remainder) - 1);
+ byteValue = (byte) (byteValue & mask);
+ if (checkOneBits) {
+ if ((mask & byteValue) != mask) {
+ return false;
+ }
+ } else {
+ if (byteValue != (byte) 0) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ /** Returns the byte at index from data right-shifted by offset. */
+ public static byte getBitsFromCurrentByte(final ArrowBuf data, final int index, final int offset) {
+ return (byte) ((data.getByte(index) & 0xFF) >>> offset);
+ }
+
+ /**
+ * Returns the byte at <code>index</code> from left-shifted by (8 - <code>offset</code>).
+ */
+ public static byte getBitsFromNextByte(ArrowBuf data, int index, int offset) {
+ return (byte) ((data.getByte(index) << (8 - offset)));
+ }
+
+ /**
+ * Returns a new buffer if the source validity buffer is either all null or all
+ * not-null, otherwise returns a buffer pointing to the same memory as source.
+ *
+ * @param fieldNode The fieldNode containing the null count
+ * @param sourceValidityBuffer The source validity buffer that will have its
+ * position copied if there is a mix of null and non-null values
+ * @param allocator The allocator to use for creating a new buffer if necessary.
+ * @return A new buffer that is either allocated or points to the same memory as sourceValidityBuffer.
+ */
+ public static ArrowBuf loadValidityBuffer(final ArrowFieldNode fieldNode,
+ final ArrowBuf sourceValidityBuffer,
+ final BufferAllocator allocator) {
+ final int valueCount = fieldNode.getLength();
+ ArrowBuf newBuffer = null;
+ /* either all NULLs or all non-NULLs */
+ if (fieldNode.getNullCount() == 0 || fieldNode.getNullCount() == valueCount) {
+ newBuffer = allocator.buffer(getValidityBufferSize(valueCount));
+ newBuffer.setZero(0, newBuffer.capacity());
+ if (fieldNode.getNullCount() != 0) {
+ /* all NULLs */
+ return newBuffer;
+ }
+ /* all non-NULLs */
+ int fullBytesCount = valueCount / 8;
+ newBuffer.setOne(0, fullBytesCount);
+ int remainder = valueCount % 8;
+ if (remainder > 0) {
+ byte bitMask = (byte) (0xFFL >>> ((8 - remainder) & 7));
+ newBuffer.setByte(fullBytesCount, bitMask);
+ }
+ } else {
+ /* mixed byte pattern -- create another ArrowBuf associated with the
+ * target allocator
+ */
+ newBuffer = sourceValidityBuffer.getReferenceManager().retain(sourceValidityBuffer, allocator);
+ }
+
+ return newBuffer;
+ }
+
+ /**
+ * Set the byte of the given index in the data buffer by applying a bit mask to
+ * the current byte at that index.
+ *
+ * @param data buffer to set
+ * @param byteIndex byteIndex within the buffer
+ * @param bitMask bit mask to be set
+ */
+ static void setBitMaskedByte(ArrowBuf data, int byteIndex, byte bitMask) {
+ byte currentByte = data.getByte(byteIndex);
+ currentByte |= bitMask;
+ data.setByte(byteIndex, currentByte);
+ }
+
+ /**
+ * Concat two validity buffers.
+ * @param input1 the first validity buffer.
+ * @param numBits1 the number of bits in the first validity buffer.
+ * @param input2 the second validity buffer.
+ * @param numBits2 the number of bits in the second validity buffer.
+ * @param output the output validity buffer. It can be the same one as the first input.
+ * The caller must make sure the output buffer has enough capacity.
+ */
+ public static void concatBits(ArrowBuf input1, int numBits1, ArrowBuf input2, int numBits2, ArrowBuf output) {
+ int numBytes1 = DataSizeRoundingUtil.divideBy8Ceil(numBits1);
+ int numBytes2 = DataSizeRoundingUtil.divideBy8Ceil(numBits2);
+ int numBytesOut = DataSizeRoundingUtil.divideBy8Ceil(numBits1 + numBits2);
+
+ if (BoundsChecking.BOUNDS_CHECKING_ENABLED) {
+ output.checkBytes(0, numBytesOut);
+ }
+
+ // copy the first bit set
+ if (input1 != output) {
+ PlatformDependent.copyMemory(input1.memoryAddress(), output.memoryAddress(), numBytes1);
+ }
+
+ if (bitIndex(numBits1) == 0) {
+ // The number of bits for the first bit set is a multiple of 8, so the boundary is at byte boundary.
+ // For this case, we have a shortcut to copy all bytes from the second set after the byte boundary.
+ PlatformDependent.copyMemory(input2.memoryAddress(), output.memoryAddress() + numBytes1, numBytes2);
+ return;
+ }
+
+ // the number of bits to fill a full byte after the first input is processed
+ int numBitsToFill = 8 - bitIndex(numBits1);
+
+ // mask to clear high bits
+ int mask = (1 << (8 - numBitsToFill)) - 1;
+
+ int numFullBytes = numBits2 / 8;
+
+ int prevByte = output.getByte(numBytes1 - 1) & mask;
+ for (int i = 0; i < numFullBytes; i++) {
+ int curByte = input2.getByte(i) & 0xff;
+
+ // first fill the bits to a full byte
+ int byteToFill = (curByte << (8 - numBitsToFill)) & 0xff;
+ output.setByte(numBytes1 + i - 1, byteToFill | prevByte);
+
+ // fill remaining bits in the current byte
+ // note that it is also the previous byte for the next iteration
+ prevByte = curByte >>> numBitsToFill;
+ }
+
+ int lastOutputByte = prevByte;
+
+ // the number of extra bits for the second input, relative to full bytes
+ int numTrailingBits = bitIndex(numBits2);
+
+ if (numTrailingBits == 0) {
+ output.setByte(numBytes1 + numFullBytes - 1, lastOutputByte);
+ return;
+ }
+
+ // process remaining bits from input2
+ int remByte = input2.getByte(numBytes2 - 1) & 0xff;
+
+ int byteToFill = remByte << (8 - numBitsToFill);
+ lastOutputByte |= byteToFill;
+
+ output.setByte(numBytes1 + numFullBytes - 1, lastOutputByte);
+
+ if (numTrailingBits > numBitsToFill) {
+ // clear all bits for the last byte before writing
+ output.setByte(numBytes1 + numFullBytes, 0);
+
+ // some remaining bits cannot be filled in the previous byte
+ int leftByte = remByte >>> numBitsToFill;
+ output.setByte(numBytes1 + numFullBytes, leftByte);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BufferBacked.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BufferBacked.java
new file mode 100644
index 000000000..ccba5b26c
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BufferBacked.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+
+/**
+ * Content is backed by a buffer and can be loaded/unloaded.
+ */
+public interface BufferBacked {
+
+ void load(ArrowFieldNode fieldNode, ArrowBuf data);
+
+ ArrowBuf unLoad();
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BufferLayout.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BufferLayout.java
new file mode 100644
index 000000000..09c874e39
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BufferLayout.java
@@ -0,0 +1,153 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import org.apache.arrow.util.Preconditions;
+
+/**
+ * Metadata class that captures the "type" of an Arrow buffer.
+ * (e.g. data buffers, offset buffers for variable width types and validity
+ * buffers).
+ */
+public class BufferLayout {
+
+ /**
+ * Enumeration of the different logical types a buffer can have.
+ */
+ public enum BufferType {
+ DATA("DATA"),
+ OFFSET("OFFSET"),
+ VALIDITY("VALIDITY"),
+ TYPE("TYPE_ID");
+
+ private final String name;
+
+ BufferType(String name) {
+ this.name = name;
+ }
+
+ public String getName() {
+ return name;
+ }
+ }
+
+ private static final BufferLayout VALIDITY_BUFFER = new BufferLayout(BufferType.VALIDITY, 1);
+ private static final BufferLayout OFFSET_BUFFER = new BufferLayout(BufferType.OFFSET, 32);
+ private static final BufferLayout LARGE_OFFSET_BUFFER = new BufferLayout(BufferType.OFFSET, 64);
+ private static final BufferLayout TYPE_BUFFER = new BufferLayout(BufferType.TYPE, 32);
+ private static final BufferLayout BIT_BUFFER = new BufferLayout(BufferType.DATA, 1);
+ private static final BufferLayout VALUES_256 = new BufferLayout(BufferType.DATA, 256);
+ private static final BufferLayout VALUES_128 = new BufferLayout(BufferType.DATA, 128);
+ private static final BufferLayout VALUES_64 = new BufferLayout(BufferType.DATA, 64);
+ private static final BufferLayout VALUES_32 = new BufferLayout(BufferType.DATA, 32);
+ private static final BufferLayout VALUES_16 = new BufferLayout(BufferType.DATA, 16);
+ private static final BufferLayout VALUES_8 = new BufferLayout(BufferType.DATA, 8);
+
+ public static BufferLayout typeBuffer() {
+ return TYPE_BUFFER;
+ }
+
+ public static BufferLayout offsetBuffer() {
+ return OFFSET_BUFFER;
+ }
+
+ public static BufferLayout largeOffsetBuffer() {
+ return LARGE_OFFSET_BUFFER;
+ }
+
+ /**
+ * Returns a databuffer for the given bitwidth. Only supports powers of two between 8 and 128
+ * inclusive.
+ */
+ public static BufferLayout dataBuffer(int typeBitWidth) {
+ switch (typeBitWidth) {
+ case 8:
+ return VALUES_8;
+ case 16:
+ return VALUES_16;
+ case 32:
+ return VALUES_32;
+ case 64:
+ return VALUES_64;
+ case 128:
+ return VALUES_128;
+ case 256:
+ return VALUES_256;
+ default:
+ throw new IllegalArgumentException("only 8, 16, 32, 64, 128, or 256 bits supported");
+ }
+ }
+
+ public static BufferLayout booleanVector() {
+ return BIT_BUFFER;
+ }
+
+ public static BufferLayout validityVector() {
+ return VALIDITY_BUFFER;
+ }
+
+ public static BufferLayout byteVector() {
+ return dataBuffer(8);
+ }
+
+ private final short typeBitWidth;
+
+ private final BufferType type;
+
+ BufferLayout(BufferType type, int typeBitWidth) {
+ super();
+ this.type = Preconditions.checkNotNull(type);
+ this.typeBitWidth = (short) typeBitWidth;
+ if (typeBitWidth <= 0) {
+ throw new IllegalArgumentException("bitWidth invalid: " + typeBitWidth);
+ }
+ }
+
+ public int getTypeBitWidth() {
+ return typeBitWidth;
+ }
+
+ public BufferType getType() {
+ return type;
+ }
+
+ @Override
+ public String toString() {
+ return String.format("%s(%s)", type, typeBitWidth);
+ }
+
+ @Override
+ public int hashCode() {
+ return 31 * (31 + type.hashCode()) + typeBitWidth;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj) {
+ return true;
+ }
+ if (obj == null) {
+ return false;
+ }
+ if (getClass() != obj.getClass()) {
+ return false;
+ }
+ BufferLayout other = (BufferLayout) obj;
+ return type.equals(other.type) && (typeBitWidth == other.typeBitWidth);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/DateDayVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/DateDayVector.java
new file mode 100644
index 000000000..3e8826845
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/DateDayVector.java
@@ -0,0 +1,347 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.DateDayReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.DateDayHolder;
+import org.apache.arrow.vector.holders.NullableDateDayHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * DateDayVector implements a fixed width (4 bytes) vector of
+ * date values which could be null. A validity buffer (bit vector) is
+ * maintained to track which elements in the vector are null.
+ */
+public final class DateDayVector extends BaseFixedWidthVector {
+
+ public static final byte TYPE_WIDTH = 4;
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a DateDayVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public DateDayVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.DATEDAY.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a DateDayVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public DateDayVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a DateDayVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public DateDayVector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ reader = new DateDayReaderImpl(DateDayVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.DATEDAY;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public int get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return valueBuffer.getInt((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableDateDayHolder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getInt((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Integer getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return valueBuffer.getInt((long) index * TYPE_WIDTH);
+ }
+ }
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ private void setValue(int index, int value) {
+ valueBuffer.setInt((long) index * TYPE_WIDTH, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, int value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableDateDayHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, DateDayHolder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, int)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, int value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableDateDayHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableDateDayHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, DateDayHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, DateDayHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void set(int index, int isSet, int value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, int)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void setSafe(int index, int isSet, int value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+ /**
+ * Given a data buffer, get the value stored at a particular position
+ * in the vector.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return value stored at the index.
+ */
+ public static int get(final ArrowBuf buffer, final int index) {
+ return buffer.getInt((long) index * TYPE_WIDTH);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((DateDayVector) to);
+ }
+
+ private class TransferImpl implements TransferPair {
+ DateDayVector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new DateDayVector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(DateDayVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public DateDayVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, DateDayVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/DateMilliVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/DateMilliVector.java
new file mode 100644
index 000000000..73738d771
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/DateMilliVector.java
@@ -0,0 +1,350 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import java.time.LocalDateTime;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.DateMilliReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.DateMilliHolder;
+import org.apache.arrow.vector.holders.NullableDateMilliHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.DateUtility;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * DateMilliVector implements a fixed width vector (8 bytes) of
+ * date values which could be null. A validity buffer (bit vector) is
+ * maintained to track which elements in the vector are null.
+ */
+public final class DateMilliVector extends BaseFixedWidthVector {
+ public static final byte TYPE_WIDTH = 8;
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a DateMilliVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public DateMilliVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.DATEMILLI.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a DateMilliVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public DateMilliVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a DateMilliVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public DateMilliVector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ reader = new DateMilliReaderImpl(DateMilliVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.DATEMILLI;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public long get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableDateMilliHolder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public LocalDateTime getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ final long millis = valueBuffer.getLong((long) index * TYPE_WIDTH);
+ return DateUtility.getLocalDateTimeFromEpochMilli(millis);
+ }
+ }
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ private void setValue(int index, long value) {
+ valueBuffer.setLong((long) index * TYPE_WIDTH, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, long value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableDateMilliHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, DateMilliHolder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, long)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, long value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableDateMilliHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableDateMilliHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, DateMilliHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, DateMilliHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void set(int index, int isSet, long value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, long)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void setSafe(int index, int isSet, long value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+ /**
+ * Given a data buffer, get the value stored at a particular position
+ * in the vector.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return value stored at the index.
+ */
+ public static long get(final ArrowBuf buffer, final int index) {
+ return buffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((DateMilliVector) to);
+ }
+
+ private class TransferImpl implements TransferPair {
+ DateMilliVector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new DateMilliVector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(DateMilliVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public DateMilliVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, DateMilliVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/Decimal256Vector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/Decimal256Vector.java
new file mode 100644
index 000000000..c5fef82d0
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/Decimal256Vector.java
@@ -0,0 +1,584 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import java.math.BigDecimal;
+import java.nio.ByteOrder;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.Decimal256ReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.Decimal256Holder;
+import org.apache.arrow.vector.holders.NullableDecimal256Holder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.DecimalUtility;
+import org.apache.arrow.vector.util.TransferPair;
+
+import io.netty.util.internal.PlatformDependent;
+
+/**
+ * Decimal256Vector implements a fixed width vector (32 bytes) of
+ * decimal values which could be null. A validity buffer (bit vector) is
+ * maintained to track which elements in the vector are null.
+ */
+public final class Decimal256Vector extends BaseFixedWidthVector {
+ public static final byte TYPE_WIDTH = 32;
+ private static final boolean LITTLE_ENDIAN = ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN;
+ private final FieldReader reader;
+
+ private final int precision;
+ private final int scale;
+
+ /**
+ * Instantiate a Decimal256Vector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public Decimal256Vector(String name, BufferAllocator allocator,
+ int precision, int scale) {
+ this(name, FieldType.nullable(new ArrowType.Decimal(precision, scale, /*bitWidth=*/TYPE_WIDTH * 8)), allocator);
+ }
+
+ /**
+ * Instantiate a Decimal256Vector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public Decimal256Vector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a Decimal256Vector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public Decimal256Vector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ ArrowType.Decimal arrowType = (ArrowType.Decimal) field.getFieldType().getType();
+ reader = new Decimal256ReaderImpl(Decimal256Vector.this);
+ this.precision = arrowType.getPrecision();
+ this.scale = arrowType.getScale();
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.DECIMAL256;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public ArrowBuf get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return valueBuffer.slice((long) index * TYPE_WIDTH, TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableDecimal256Holder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.buffer = valueBuffer;
+ holder.precision = precision;
+ holder.scale = scale;
+ holder.start = ((long) index) * TYPE_WIDTH;
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public BigDecimal getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return DecimalUtility.getBigDecimalFromArrowBuf(valueBuffer, index, scale, TYPE_WIDTH);
+ }
+ }
+
+ /**
+ * Return precision for the decimal value.
+ */
+ public int getPrecision() {
+ return precision;
+ }
+
+ /**
+ * Return scale for the decimal value.
+ */
+ public int getScale() {
+ return scale;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param buffer ArrowBuf containing decimal value.
+ */
+ public void set(int index, ArrowBuf buffer) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ valueBuffer.setBytes((long) index * TYPE_WIDTH, buffer, 0, TYPE_WIDTH);
+ }
+
+ /**
+ * Set the decimal element at given index to the provided array of bytes.
+ * Decimal256 is now implemented as Native Endian. This API allows the user
+ * to pass a decimal value in the form of byte array in BE byte order.
+ *
+ * <p>Consumers of Arrow code can use this API instead of first swapping
+ * the source bytes (doing a write and read) and then finally writing to
+ * ArrowBuf of decimal vector.
+ *
+ * <p>This method takes care of adding the necessary padding if the length
+ * of byte array is less then 32 (length of decimal type).
+ *
+ * @param index position of element
+ * @param value array of bytes containing decimal in big endian byte order.
+ */
+ public void setBigEndian(int index, byte[] value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ final int length = value.length;
+
+ // do the bound check.
+ valueBuffer.checkBytes((long) index * TYPE_WIDTH, (long) (index + 1) * TYPE_WIDTH);
+
+ long outAddress = valueBuffer.memoryAddress() + (long) index * TYPE_WIDTH;
+ if (length == 0) {
+ PlatformDependent.setMemory(outAddress, Decimal256Vector.TYPE_WIDTH, (byte) 0);
+ return;
+ }
+ if (LITTLE_ENDIAN) {
+ // swap bytes to convert BE to LE
+ for (int byteIdx = 0; byteIdx < length; ++byteIdx) {
+ PlatformDependent.putByte(outAddress + byteIdx, value[length - 1 - byteIdx]);
+ }
+
+ if (length == TYPE_WIDTH) {
+ return;
+ }
+
+ if (length < TYPE_WIDTH) {
+ // sign extend
+ final byte pad = (byte) (value[0] < 0 ? 0xFF : 0x00);
+ PlatformDependent.setMemory(outAddress + length, Decimal256Vector.TYPE_WIDTH - length, pad);
+ return;
+ }
+ } else {
+ if (length <= TYPE_WIDTH) {
+ // copy data from value to outAddress
+ PlatformDependent.copyMemory(value, 0, outAddress + Decimal256Vector.TYPE_WIDTH - length, length);
+ // sign extend
+ final byte pad = (byte) (value[0] < 0 ? 0xFF : 0x00);
+ PlatformDependent.setMemory(outAddress, Decimal256Vector.TYPE_WIDTH - length, pad);
+ return;
+ }
+ }
+ throw new IllegalArgumentException(
+ "Invalid decimal value length. Valid length in [1 - 32], got " + length);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param start start index of data in the buffer
+ * @param buffer ArrowBuf containing decimal value.
+ */
+ public void set(int index, long start, ArrowBuf buffer) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ valueBuffer.setBytes((long) index * TYPE_WIDTH, buffer, start, TYPE_WIDTH);
+ }
+
+ /**
+ * Sets the element at given index using the buffer whose size maybe <= 32 bytes.
+ * @param index index to write the decimal to
+ * @param start start of value in the buffer
+ * @param buffer contains the decimal in native endian bytes
+ * @param length length of the value in the buffer
+ */
+ public void setSafe(int index, long start, ArrowBuf buffer, int length) {
+ handleSafe(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+
+ // do the bound checks.
+ buffer.checkBytes(start, start + length);
+ valueBuffer.checkBytes((long) index * TYPE_WIDTH, (long) (index + 1) * TYPE_WIDTH);
+
+ long inAddress = buffer.memoryAddress() + start;
+ long outAddress = valueBuffer.memoryAddress() + (long) index * TYPE_WIDTH;
+ if (LITTLE_ENDIAN) {
+ PlatformDependent.copyMemory(inAddress, outAddress, length);
+ // sign extend
+ if (length < TYPE_WIDTH) {
+ byte msb = PlatformDependent.getByte(inAddress + length - 1);
+ final byte pad = (byte) (msb < 0 ? 0xFF : 0x00);
+ PlatformDependent.setMemory(outAddress + length, Decimal256Vector.TYPE_WIDTH - length, pad);
+ }
+ } else {
+ PlatformDependent.copyMemory(inAddress, outAddress + Decimal256Vector.TYPE_WIDTH - length, length);
+ // sign extend
+ if (length < TYPE_WIDTH) {
+ byte msb = PlatformDependent.getByte(inAddress);
+ final byte pad = (byte) (msb < 0 ? 0xFF : 0x00);
+ PlatformDependent.setMemory(outAddress, Decimal256Vector.TYPE_WIDTH - length, pad);
+ }
+ }
+ }
+
+
+ /**
+ * Sets the element at given index using the buffer whose size maybe <= 32 bytes.
+ * @param index index to write the decimal to
+ * @param start start of value in the buffer
+ * @param buffer contains the decimal in big endian bytes
+ * @param length length of the value in the buffer
+ */
+ public void setBigEndianSafe(int index, long start, ArrowBuf buffer, int length) {
+ handleSafe(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+
+ // do the bound checks.
+ buffer.checkBytes(start, start + length);
+ valueBuffer.checkBytes((long) index * TYPE_WIDTH, (long) (index + 1) * TYPE_WIDTH);
+
+ // not using buffer.getByte() to avoid boundary checks for every byte.
+ long inAddress = buffer.memoryAddress() + start;
+ long outAddress = valueBuffer.memoryAddress() + (long) index * TYPE_WIDTH;
+ if (LITTLE_ENDIAN) {
+ // swap bytes to convert BE to LE
+ for (int byteIdx = 0; byteIdx < length; ++byteIdx) {
+ byte val = PlatformDependent.getByte((inAddress + length - 1) - byteIdx);
+ PlatformDependent.putByte(outAddress + byteIdx, val);
+ }
+ // sign extend
+ if (length < 32) {
+ byte msb = PlatformDependent.getByte(inAddress);
+ final byte pad = (byte) (msb < 0 ? 0xFF : 0x00);
+ PlatformDependent.setMemory(outAddress + length, Decimal256Vector.TYPE_WIDTH - length, pad);
+ }
+ } else {
+ PlatformDependent.copyMemory(inAddress, outAddress + Decimal256Vector.TYPE_WIDTH - length, length);
+ // sign extend
+ if (length < TYPE_WIDTH) {
+ byte msb = PlatformDependent.getByte(inAddress);
+ final byte pad = (byte) (msb < 0 ? 0xFF : 0x00);
+ PlatformDependent.setMemory(outAddress, Decimal256Vector.TYPE_WIDTH - length, pad);
+ }
+ }
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value BigDecimal containing decimal value.
+ */
+ public void set(int index, BigDecimal value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ DecimalUtility.checkPrecisionAndScale(value, precision, scale);
+ DecimalUtility.writeBigDecimalToArrowBuf(value, valueBuffer, index, TYPE_WIDTH);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value long value.
+ */
+ public void set(int index, long value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ DecimalUtility.writeLongToArrowBuf(value, valueBuffer, index, TYPE_WIDTH);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableDecimal256Holder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ valueBuffer.setBytes((long) index * TYPE_WIDTH, holder.buffer, holder.start, TYPE_WIDTH);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, Decimal256Holder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ valueBuffer.setBytes((long) index * TYPE_WIDTH, holder.buffer, holder.start, TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #set(int, ArrowBuf)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param buffer ArrowBuf containing decimal value.
+ */
+ public void setSafe(int index, ArrowBuf buffer) {
+ handleSafe(index);
+ set(index, buffer);
+ }
+
+ /**
+ * Same as {@link #setBigEndian(int, byte[])} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ */
+ public void setBigEndianSafe(int index, byte[] value) {
+ handleSafe(index);
+ setBigEndian(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, int, ArrowBuf)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param start start index of data in the buffer
+ * @param buffer ArrowBuf containing decimal value.
+ */
+ public void setSafe(int index, long start, ArrowBuf buffer) {
+ handleSafe(index);
+ set(index, start, buffer);
+ }
+
+ /**
+ * Same as {@link #set(int, BigDecimal)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value BigDecimal containing decimal value.
+ */
+ public void setSafe(int index, BigDecimal value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, long)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value long value.
+ */
+ public void setSafe(int index, long value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableDecimalHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableDecimal256Holder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, Decimal256Holder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, Decimal256Holder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param start start position of the value in the buffer
+ * @param buffer buffer containing the value to be stored in the vector
+ */
+ public void set(int index, int isSet, long start, ArrowBuf buffer) {
+ if (isSet > 0) {
+ set(index, start, buffer);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #setSafe(int, int, int, ArrowBuf)} except that it handles
+ * the case when the position of new value is beyond the current value
+ * capacity of the vector.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param start start position of the value in the buffer
+ * @param buffer buffer containing the value to be stored in the vector
+ */
+ public void setSafe(int index, int isSet, long start, ArrowBuf buffer) {
+ handleSafe(index);
+ set(index, isSet, start, buffer);
+ }
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((Decimal256Vector) to);
+ }
+
+ private class TransferImpl implements TransferPair {
+ Decimal256Vector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new Decimal256Vector(ref, allocator, Decimal256Vector.this.precision,
+ Decimal256Vector.this.scale);
+ }
+
+ public TransferImpl(Decimal256Vector to) {
+ this.to = to;
+ }
+
+ @Override
+ public Decimal256Vector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, Decimal256Vector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java
new file mode 100644
index 000000000..f988f4f94
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java
@@ -0,0 +1,584 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import java.math.BigDecimal;
+import java.nio.ByteOrder;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.DecimalReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.DecimalHolder;
+import org.apache.arrow.vector.holders.NullableDecimalHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.DecimalUtility;
+import org.apache.arrow.vector.util.TransferPair;
+
+import io.netty.util.internal.PlatformDependent;
+
+/**
+ * DecimalVector implements a fixed width vector (16 bytes) of
+ * decimal values which could be null. A validity buffer (bit vector) is
+ * maintained to track which elements in the vector are null.
+ */
+public final class DecimalVector extends BaseFixedWidthVector {
+ public static final byte TYPE_WIDTH = 16;
+ private static final boolean LITTLE_ENDIAN = ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN;
+ private final FieldReader reader;
+
+ private final int precision;
+ private final int scale;
+
+ /**
+ * Instantiate a DecimalVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public DecimalVector(String name, BufferAllocator allocator,
+ int precision, int scale) {
+ this(name, FieldType.nullable(new ArrowType.Decimal(precision, scale, TYPE_WIDTH * 8)), allocator);
+ }
+
+ /**
+ * Instantiate a DecimalVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public DecimalVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a DecimalVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public DecimalVector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ ArrowType.Decimal arrowType = (ArrowType.Decimal) field.getFieldType().getType();
+ reader = new DecimalReaderImpl(DecimalVector.this);
+ this.precision = arrowType.getPrecision();
+ this.scale = arrowType.getScale();
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.DECIMAL;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public ArrowBuf get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return valueBuffer.slice((long) index * TYPE_WIDTH, TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableDecimalHolder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.buffer = valueBuffer;
+ holder.precision = precision;
+ holder.scale = scale;
+ holder.start = (long) index * TYPE_WIDTH;
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public BigDecimal getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return DecimalUtility.getBigDecimalFromArrowBuf(valueBuffer, index, scale, TYPE_WIDTH);
+ }
+ }
+
+ /**
+ * Return precision for the decimal value.
+ */
+ public int getPrecision() {
+ return precision;
+ }
+
+ /**
+ * Return scale for the decimal value.
+ */
+ public int getScale() {
+ return scale;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param buffer ArrowBuf containing decimal value.
+ */
+ public void set(int index, ArrowBuf buffer) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ valueBuffer.setBytes((long) index * TYPE_WIDTH, buffer, 0, TYPE_WIDTH);
+ }
+
+ /**
+ * Set the decimal element at given index to the provided array of bytes.
+ * Decimal is now implemented as Native Endian. This API allows the user
+ * to pass a decimal value in the form of byte array in BE byte order.
+ *
+ * <p>Consumers of Arrow code can use this API instead of first swapping
+ * the source bytes (doing a write and read) and then finally writing to
+ * ArrowBuf of decimal vector.
+ *
+ * <p>This method takes care of adding the necessary padding if the length
+ * of byte array is less then 16 (length of decimal type).
+ *
+ * @param index position of element
+ * @param value array of bytes containing decimal in big endian byte order.
+ */
+ public void setBigEndian(int index, byte[] value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ final int length = value.length;
+
+ // do the bound check.
+ valueBuffer.checkBytes((long) index * TYPE_WIDTH, (long) (index + 1) * TYPE_WIDTH);
+
+ long outAddress = valueBuffer.memoryAddress() + (long) index * TYPE_WIDTH;
+ if (length == 0) {
+ PlatformDependent.setMemory(outAddress, DecimalVector.TYPE_WIDTH, (byte) 0);
+ return;
+ }
+ if (LITTLE_ENDIAN) {
+ // swap bytes to convert BE to LE
+ for (int byteIdx = 0; byteIdx < length; ++byteIdx) {
+ PlatformDependent.putByte(outAddress + byteIdx, value[length - 1 - byteIdx]);
+ }
+
+ if (length == TYPE_WIDTH) {
+ return;
+ }
+
+ if (length < TYPE_WIDTH) {
+ // sign extend
+ final byte pad = (byte) (value[0] < 0 ? 0xFF : 0x00);
+ PlatformDependent.setMemory(outAddress + length, DecimalVector.TYPE_WIDTH - length, pad);
+ return;
+ }
+ } else {
+ if (length <= TYPE_WIDTH) {
+ // copy data from value to outAddress
+ PlatformDependent.copyMemory(value, 0, outAddress + DecimalVector.TYPE_WIDTH - length, length);
+ // sign extend
+ final byte pad = (byte) (value[0] < 0 ? 0xFF : 0x00);
+ PlatformDependent.setMemory(outAddress, DecimalVector.TYPE_WIDTH - length, pad);
+ return;
+ }
+ }
+ throw new IllegalArgumentException(
+ "Invalid decimal value length. Valid length in [1 - 16], got " + length);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param start start index of data in the buffer
+ * @param buffer ArrowBuf containing decimal value.
+ */
+ public void set(int index, long start, ArrowBuf buffer) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ valueBuffer.setBytes((long) index * TYPE_WIDTH, buffer, start, TYPE_WIDTH);
+ }
+
+ /**
+ * Sets the element at given index using the buffer whose size maybe <= 16 bytes.
+ * @param index index to write the decimal to
+ * @param start start of value in the buffer
+ * @param buffer contains the decimal in native endian bytes
+ * @param length length of the value in the buffer
+ */
+ public void setSafe(int index, long start, ArrowBuf buffer, int length) {
+ handleSafe(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+
+ // do the bound checks.
+ buffer.checkBytes(start, start + length);
+ valueBuffer.checkBytes((long) index * TYPE_WIDTH, (long) (index + 1) * TYPE_WIDTH);
+
+ long inAddress = buffer.memoryAddress() + start;
+ long outAddress = valueBuffer.memoryAddress() + (long) index * TYPE_WIDTH;
+ if (LITTLE_ENDIAN) {
+ PlatformDependent.copyMemory(inAddress, outAddress, length);
+ // sign extend
+ if (length < TYPE_WIDTH) {
+ byte msb = PlatformDependent.getByte(inAddress + length - 1);
+ final byte pad = (byte) (msb < 0 ? 0xFF : 0x00);
+ PlatformDependent.setMemory(outAddress + length, DecimalVector.TYPE_WIDTH - length, pad);
+ }
+ } else {
+ PlatformDependent.copyMemory(inAddress, outAddress + DecimalVector.TYPE_WIDTH - length, length);
+ // sign extend
+ if (length < TYPE_WIDTH) {
+ byte msb = PlatformDependent.getByte(inAddress);
+ final byte pad = (byte) (msb < 0 ? 0xFF : 0x00);
+ PlatformDependent.setMemory(outAddress, DecimalVector.TYPE_WIDTH - length, pad);
+ }
+ }
+ }
+
+
+ /**
+ * Sets the element at given index using the buffer whose size maybe <= 16 bytes.
+ * @param index index to write the decimal to
+ * @param start start of value in the buffer
+ * @param buffer contains the decimal in big endian bytes
+ * @param length length of the value in the buffer
+ */
+ public void setBigEndianSafe(int index, long start, ArrowBuf buffer, int length) {
+ handleSafe(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+
+ // do the bound checks.
+ buffer.checkBytes(start, start + length);
+ valueBuffer.checkBytes((long) index * TYPE_WIDTH, (long) (index + 1) * TYPE_WIDTH);
+
+ // not using buffer.getByte() to avoid boundary checks for every byte.
+ long inAddress = buffer.memoryAddress() + start;
+ long outAddress = valueBuffer.memoryAddress() + (long) index * TYPE_WIDTH;
+ if (LITTLE_ENDIAN) {
+ // swap bytes to convert BE to LE
+ for (int byteIdx = 0; byteIdx < length; ++byteIdx) {
+ byte val = PlatformDependent.getByte((inAddress + length - 1) - byteIdx);
+ PlatformDependent.putByte(outAddress + byteIdx, val);
+ }
+ // sign extend
+ if (length < TYPE_WIDTH) {
+ byte msb = PlatformDependent.getByte(inAddress);
+ final byte pad = (byte) (msb < 0 ? 0xFF : 0x00);
+ PlatformDependent.setMemory(outAddress + length, DecimalVector.TYPE_WIDTH - length, pad);
+ }
+ } else {
+ PlatformDependent.copyMemory(inAddress, outAddress + DecimalVector.TYPE_WIDTH - length, length);
+ // sign extend
+ if (length < TYPE_WIDTH) {
+ byte msb = PlatformDependent.getByte(inAddress);
+ final byte pad = (byte) (msb < 0 ? 0xFF : 0x00);
+ PlatformDependent.setMemory(outAddress, DecimalVector.TYPE_WIDTH - length, pad);
+ }
+ }
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value BigDecimal containing decimal value.
+ */
+ public void set(int index, BigDecimal value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ DecimalUtility.checkPrecisionAndScale(value, precision, scale);
+ DecimalUtility.writeBigDecimalToArrowBuf(value, valueBuffer, index, TYPE_WIDTH);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value long value.
+ */
+ public void set(int index, long value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ DecimalUtility.writeLongToArrowBuf(value, valueBuffer, index, TYPE_WIDTH);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableDecimalHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ valueBuffer.setBytes((long) index * TYPE_WIDTH, holder.buffer, holder.start, TYPE_WIDTH);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, DecimalHolder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ valueBuffer.setBytes((long) index * TYPE_WIDTH, holder.buffer, holder.start, TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #set(int, ArrowBuf)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param buffer ArrowBuf containing decimal value.
+ */
+ public void setSafe(int index, ArrowBuf buffer) {
+ handleSafe(index);
+ set(index, buffer);
+ }
+
+ /**
+ * Same as {@link #setBigEndian(int, byte[])} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ */
+ public void setBigEndianSafe(int index, byte[] value) {
+ handleSafe(index);
+ setBigEndian(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, long, ArrowBuf)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param start start index of data in the buffer
+ * @param buffer ArrowBuf containing decimal value.
+ */
+ public void setSafe(int index, long start, ArrowBuf buffer) {
+ handleSafe(index);
+ set(index, start, buffer);
+ }
+
+ /**
+ * Same as {@link #set(int, BigDecimal)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value BigDecimal containing decimal value.
+ */
+ public void setSafe(int index, BigDecimal value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, long)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value long value.
+ */
+ public void setSafe(int index, long value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableDecimalHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableDecimalHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, DecimalHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, DecimalHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param start start position of the value in the buffer
+ * @param buffer buffer containing the value to be stored in the vector
+ */
+ public void set(int index, int isSet, long start, ArrowBuf buffer) {
+ if (isSet > 0) {
+ set(index, start, buffer);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, long, ArrowBuf)} except that it handles
+ * the case when the position of new value is beyond the current value
+ * capacity of the vector.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param start start position of the value in the buffer
+ * @param buffer buffer containing the value to be stored in the vector
+ */
+ public void setSafe(int index, int isSet, long start, ArrowBuf buffer) {
+ handleSafe(index);
+ set(index, isSet, start, buffer);
+ }
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((DecimalVector) to);
+ }
+
+ private class TransferImpl implements TransferPair {
+ DecimalVector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new DecimalVector(ref, allocator, DecimalVector.this.precision,
+ DecimalVector.this.scale);
+ }
+
+ public TransferImpl(DecimalVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public DecimalVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, DecimalVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/DensityAwareVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/DensityAwareVector.java
new file mode 100644
index 000000000..c16db40f7
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/DensityAwareVector.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+/**
+ * Vector that support density aware initial capacity settings.
+ * We use this for ListVector and VarCharVector as of now to
+ * control the memory allocated.
+ *
+ * <p>For ListVector, we have been using a multiplier of 5
+ * to compute the initial capacity of the inner data vector.
+ * For deeply nested lists and lists with lots of NULL values,
+ * this is over-allocation upfront. So density helps to be
+ * conservative when computing the value capacity of the
+ * inner vector.
+ *
+ * <p>For example, a density value of 10 implies each position in the
+ * list vector has a list of 10 values. So we will provision
+ * an initial capacity of (valuecount * 10) for the inner vector.
+ * A density value of 0.1 implies out of 10 positions in the list vector,
+ * 1 position has a list of size 1 and remaining positions are
+ * null (no lists) or empty lists. This helps in tightly controlling
+ * the memory we provision for inner data vector.
+ *
+ * <p>Similar analogy is applicable for VarCharVector where the capacity
+ * of the data buffer can be controlled using density multiplier
+ * instead of default multiplier of 8 (default size of average
+ * varchar length).
+ *
+ * <p>Also from container vectors, we propagate the density down
+ * the inner vectors so that they can use it appropriately.
+ */
+public interface DensityAwareVector {
+
+ /**
+ * Set value with density.
+ *
+ * @param valueCount the number of values in this vector
+ * @param density the density of the vector
+ */
+ void setInitialCapacity(int valueCount, double density);
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/DurationVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/DurationVector.java
new file mode 100644
index 000000000..9671b34e0
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/DurationVector.java
@@ -0,0 +1,406 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static java.util.concurrent.TimeUnit.MICROSECONDS;
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import java.time.Duration;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.DurationReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.DurationHolder;
+import org.apache.arrow.vector.holders.NullableDurationHolder;
+import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * DurationVector implements a fixed width vector (8 bytes) of
+ * a configurable TimeUnit granularity duration values which could be null.
+ * A validity buffer (bit vector) is maintained to track which elements in the
+ * vector are null.
+ */
+public final class DurationVector extends BaseFixedWidthVector {
+ public static final byte TYPE_WIDTH = 8;
+ private final FieldReader reader;
+
+ private final TimeUnit unit;
+
+ /**
+ * Instantiate a DurationVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public DurationVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a DurationVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public DurationVector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ reader = new DurationReaderImpl(DurationVector.this);
+ this.unit = ((ArrowType.Duration) field.getFieldType().getType()).getUnit();
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.DURATION;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+ /**
+ * Given a data buffer, get the value stored at a particular position
+ * in the vector.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return value stored at the index.
+ */
+ public static long get(final ArrowBuf buffer, final int index) {
+ return buffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public ArrowBuf get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ return null;
+ }
+ return valueBuffer.slice((long) index * TYPE_WIDTH, TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableDurationHolder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = get(valueBuffer, index);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Duration getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ final long value = get(valueBuffer, index);
+ return toDuration(value, unit);
+ }
+ }
+
+ /**
+ * Converts the given value and unit to the appropriate {@link Duration}.
+ */
+ public static Duration toDuration(long value, TimeUnit unit) {
+ switch (unit) {
+ case SECOND:
+ return Duration.ofSeconds(value);
+ case MILLISECOND:
+ return Duration.ofMillis(value);
+ case NANOSECOND:
+ return Duration.ofNanos(value);
+ case MICROSECOND:
+ return Duration.ofNanos(MICROSECONDS.toNanos(value));
+ default:
+ throw new IllegalArgumentException("Unknown timeunit: " + unit);
+ }
+ }
+
+ /**
+ * Get the Interval value at a given index as a {@link StringBuilder} object.
+ *
+ * @param index position of the element
+ * @return String Builder object with Interval in java.time.Duration format.
+ */
+ public StringBuilder getAsStringBuilder(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return getAsStringBuilderHelper(index);
+ }
+ }
+
+ private StringBuilder getAsStringBuilderHelper(int index) {
+ return new StringBuilder(getObject(index).toString());
+ }
+
+ /**
+ * Gets the time unit of the duration.
+ */
+ public TimeUnit getUnit() {
+ return unit;
+ }
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, ArrowBuf value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ valueBuffer.setBytes((long) index * TYPE_WIDTH, value, 0, TYPE_WIDTH);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value The duration value (in the timeunit associated with this vector)
+ */
+ public void set(int index, long value) {
+ final long offsetIndex = (long) index * TYPE_WIDTH;
+ BitVectorHelper.setBit(validityBuffer, index);
+ valueBuffer.setLong(offsetIndex, value);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableDurationHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ set(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, DurationHolder holder) {
+ set(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, ArrowBuf)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, ArrowBuf value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, long)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value duration in the time unit this vector was constructed with
+ */
+ public void setSafe(int index, long value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableDurationHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableDurationHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, DurationHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, DurationHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value The duration value (in the TimeUnit associated with this vector).
+ */
+ public void set(int index, int isSet, long value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, long)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value The duration value (in the timeunit associated with this vector)
+ */
+ public void setSafe(int index, int isSet, long value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((DurationVector) to);
+ }
+
+ private class TransferImpl implements TransferPair {
+ DurationVector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new DurationVector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(DurationVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public DurationVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, DurationVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ElementAddressableVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ElementAddressableVector.java
new file mode 100644
index 000000000..f37a50100
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ElementAddressableVector.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import org.apache.arrow.memory.util.ArrowBufPointer;
+
+/**
+ * Vector for which each data element resides in a continuous memory region,
+ * so it can be pointed to by an {@link org.apache.arrow.memory.util.ArrowBufPointer}.
+ */
+public interface ElementAddressableVector extends ValueVector {
+
+ /**
+ * Gets the pointer for the data at the given index.
+ * @param index the index for the data.
+ * @return the pointer to the data.
+ */
+ ArrowBufPointer getDataPointer(int index);
+
+ /**
+ * Gets the pointer for the data at the given index.
+ * @param index the index for the data.
+ * @param reuse the data pointer to fill, this avoids creating a new pointer object.
+ * @return the pointer to the data, it should be the same one as the input parameter
+ */
+ ArrowBufPointer getDataPointer(int index, ArrowBufPointer reuse);
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ExtensionTypeVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ExtensionTypeVector.java
new file mode 100644
index 000000000..2041227fc
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ExtensionTypeVector.java
@@ -0,0 +1,274 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.OutOfMemoryException;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * A vector that wraps an underlying vector, used to help implement extension types.
+ * @param <T> The wrapped vector type.
+ */
+public abstract class ExtensionTypeVector<T extends ValueVector & FieldVector> extends BaseValueVector implements
+ FieldVector {
+
+ private final T underlyingVector;
+ private final String name;
+
+ /**
+ * Instantiate an extension type vector.
+ * @param name name of the vector
+ * @param allocator allocator for memory management
+ * @param underlyingVector underlying filed vector
+ */
+ public ExtensionTypeVector(String name, BufferAllocator allocator, T underlyingVector) {
+ super(allocator);
+ Preconditions.checkNotNull(underlyingVector, "underlyingVector can not be null.");
+ this.name = name;
+ this.underlyingVector = underlyingVector;
+ }
+
+ /**
+ * Instantiate an extension type vector.
+ * @param field field materialized by this vector.
+ * @param allocator allocator for memory management
+ * @param underlyingVector underlying filed vector
+ */
+ public ExtensionTypeVector(Field field, BufferAllocator allocator, T underlyingVector) {
+ this(field.getName(), allocator, underlyingVector);
+ }
+
+ @Override
+ public String getName() {
+ return name;
+ }
+
+ /** Get the underlying vector. */
+ public T getUnderlyingVector() {
+ return underlyingVector;
+ }
+
+ @Override
+ public void allocateNew() throws OutOfMemoryException {
+ this.underlyingVector.allocateNew();
+ }
+
+ @Override
+ public boolean allocateNewSafe() {
+ return this.underlyingVector.allocateNewSafe();
+ }
+
+ @Override
+ public void reAlloc() {
+ this.underlyingVector.reAlloc();
+ }
+
+ @Override
+ public void setInitialCapacity(int numRecords) {
+ this.underlyingVector.setInitialCapacity(numRecords);
+ }
+
+ @Override
+ public int getValueCapacity() {
+ return this.underlyingVector.getValueCapacity();
+ }
+
+ @Override
+ public void reset() {
+ this.underlyingVector.reset();
+ }
+
+ @Override
+ public Field getField() {
+ return this.underlyingVector.getField();
+ }
+
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.EXTENSIONTYPE;
+ }
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return underlyingVector.getTransferPair(ref, allocator);
+ }
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) {
+ return underlyingVector.getTransferPair(ref, allocator, callBack);
+ }
+
+ @Override
+ public TransferPair makeTransferPair(ValueVector target) {
+ return underlyingVector.makeTransferPair(target);
+ }
+
+ @Override
+ public FieldReader getReader() {
+ return underlyingVector.getReader();
+ }
+
+ @Override
+ public int getBufferSize() {
+ return underlyingVector.getBufferSize();
+ }
+
+ @Override
+ public int getBufferSizeFor(int valueCount) {
+ return underlyingVector.getBufferSizeFor(valueCount);
+ }
+
+ @Override
+ public ArrowBuf[] getBuffers(boolean clear) {
+ return underlyingVector.getBuffers(clear);
+ }
+
+ @Override
+ public ArrowBuf getValidityBuffer() {
+ return underlyingVector.getValidityBuffer();
+ }
+
+ @Override
+ public ArrowBuf getDataBuffer() {
+ return underlyingVector.getDataBuffer();
+ }
+
+ @Override
+ public ArrowBuf getOffsetBuffer() {
+ return underlyingVector.getOffsetBuffer();
+ }
+
+ @Override
+ public int getValueCount() {
+ return underlyingVector.getValueCount();
+ }
+
+ @Override
+ public void setValueCount(int valueCount) {
+ underlyingVector.setValueCount(valueCount);
+ }
+
+ /**
+ * Get the extension object at the specified index.
+ *
+ * <p>Generally, this should access the underlying vector and construct the corresponding Java object from the raw
+ * data.
+ */
+ @Override
+ public abstract Object getObject(int index);
+
+ @Override
+ public int getNullCount() {
+ return underlyingVector.getNullCount();
+ }
+
+ @Override
+ public boolean isNull(int index) {
+ return underlyingVector.isNull(index);
+ }
+
+ @Override
+ public void initializeChildrenFromFields(List<Field> children) {
+ underlyingVector.initializeChildrenFromFields(children);
+ }
+
+ @Override
+ public List<FieldVector> getChildrenFromFields() {
+ return underlyingVector.getChildrenFromFields();
+ }
+
+ @Override
+ public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers) {
+ underlyingVector.loadFieldBuffers(fieldNode, ownBuffers);
+ }
+
+ @Override
+ public List<ArrowBuf> getFieldBuffers() {
+ return underlyingVector.getFieldBuffers();
+ }
+
+ /**
+ * Get the inner vectors.
+ *
+ * @deprecated This API will be removed as the current implementations no longer support inner vectors.
+ *
+ * @return the inner vectors for this field as defined by the TypeLayout
+ */
+ @Deprecated
+ @Override
+ public List<BufferBacked> getFieldInnerVectors() {
+ return underlyingVector.getFieldInnerVectors();
+ }
+
+ @Override
+ public long getValidityBufferAddress() {
+ return underlyingVector.getValidityBufferAddress();
+ }
+
+ @Override
+ public long getDataBufferAddress() {
+ return underlyingVector.getDataBufferAddress();
+ }
+
+ @Override
+ public long getOffsetBufferAddress() {
+ return underlyingVector.getOffsetBufferAddress();
+ }
+
+ @Override
+ public void clear() {
+ underlyingVector.clear();
+ }
+
+ @Override
+ public void close() {
+ underlyingVector.close();
+ }
+
+ @Override
+ public TransferPair getTransferPair(BufferAllocator allocator) {
+ return underlyingVector.getTransferPair(allocator);
+ }
+
+ @Override
+ public Iterator<ValueVector> iterator() {
+ return underlyingVector.iterator();
+ }
+
+ @Override
+ public BufferAllocator getAllocator() {
+ return underlyingVector.getAllocator();
+ }
+
+ @Override
+ public <OUT, IN> OUT accept(VectorVisitor<OUT, IN> visitor, IN value) {
+ return visitor.visit(this, value);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/FieldVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/FieldVector.java
new file mode 100644
index 000000000..b00581a04
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/FieldVector.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.types.pojo.Field;
+
+/**
+ * A vector corresponding to a Field in the schema.
+ * It has inner vectors backed by buffers (validity, offsets, data, ...)
+ */
+public interface FieldVector extends ValueVector {
+
+ /**
+ * Initializes the child vectors
+ * to be later loaded with loadBuffers.
+ *
+ * @param children the schema
+ */
+ void initializeChildrenFromFields(List<Field> children);
+
+ /**
+ * The returned list is the same size as the list passed to initializeChildrenFromFields.
+ *
+ * @return the children according to schema (empty for primitive types)
+ */
+ List<FieldVector> getChildrenFromFields();
+
+ /**
+ * Loads data in the vectors.
+ * (ownBuffers must be the same size as getFieldVectors())
+ *
+ * @param fieldNode the fieldNode
+ * @param ownBuffers the buffers for this Field (own buffers only, children not included)
+ */
+ void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers);
+
+ /**
+ * Get the buffers of the fields, (same size as getFieldVectors() since it is their content).
+ *
+ * @return the buffers containing the data for this vector (ready for reading)
+ */
+ List<ArrowBuf> getFieldBuffers();
+
+ /**
+ * Get the inner vectors.
+ *
+ * @deprecated This API will be removed as the current implementations no longer support inner vectors.
+ *
+ * @return the inner vectors for this field as defined by the TypeLayout
+ */
+ @Deprecated
+ List<BufferBacked> getFieldInnerVectors();
+
+ /**
+ * Gets the starting address of the underlying buffer associated with validity vector.
+ *
+ * @return buffer address
+ */
+ long getValidityBufferAddress();
+
+ /**
+ * Gets the starting address of the underlying buffer associated with data vector.
+ *
+ * @return buffer address
+ */
+ long getDataBufferAddress();
+
+ /**
+ * Gets the starting address of the underlying buffer associated with offset vector.
+ *
+ * @return buffer address
+ */
+ long getOffsetBufferAddress();
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/FixedSizeBinaryVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/FixedSizeBinaryVector.java
new file mode 100644
index 000000000..e1847e4bb
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/FixedSizeBinaryVector.java
@@ -0,0 +1,386 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.complex.impl.FixedSizeBinaryReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.FixedSizeBinaryHolder;
+import org.apache.arrow.vector.holders.NullableFixedSizeBinaryHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeBinary;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * FixedSizeBinaryVector implements a fixed width vector of
+ * binary values which could be null. A validity buffer (bit vector) is
+ * maintained to track which elements in the vector are null.
+ */
+public class FixedSizeBinaryVector extends BaseFixedWidthVector {
+ private final int byteWidth;
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a FixedSizeBinaryVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ * @param byteWidth byte width of the binary values
+ */
+ public FixedSizeBinaryVector(String name, BufferAllocator allocator, int byteWidth) {
+ this(name, FieldType.nullable(new FixedSizeBinary(byteWidth)), allocator);
+ }
+
+ /**
+ * Instantiate a FixedSizeBinaryVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public FixedSizeBinaryVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a FixedSizeBinaryVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public FixedSizeBinaryVector(Field field, BufferAllocator allocator) {
+ super(field, allocator, ((FixedSizeBinary) field.getFieldType().getType()).getByteWidth());
+ reader = new FixedSizeBinaryReaderImpl(FixedSizeBinaryVector.this);
+ byteWidth = ((FixedSizeBinary) field.getFieldType().getType()).getByteWidth();
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.FIXEDSIZEBINARY;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public byte[] get(int index) {
+ assert index >= 0;
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ return null;
+ }
+ final byte[] dst = new byte[byteWidth];
+ valueBuffer.getBytes((long) index * byteWidth, dst, 0, byteWidth);
+ return dst;
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ * @param holder nullable holder to carry the buffer
+ */
+ public void get(int index, NullableFixedSizeBinaryHolder holder) {
+ assert index >= 0;
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.buffer = valueBuffer.slice((long) index * byteWidth, byteWidth);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ @Override
+ public byte[] getObject(int index) {
+ return get(index);
+ }
+
+ public int getByteWidth() {
+ return byteWidth;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+ /** Sets the value at index to the provided one. */
+ public void set(int index, byte[] value) {
+ assert index >= 0;
+ Preconditions.checkNotNull(value, "expecting a valid byte array");
+ assert byteWidth <= value.length;
+ BitVectorHelper.setBit(validityBuffer, index);
+ valueBuffer.setBytes((long) index * byteWidth, value, 0, byteWidth);
+ }
+
+ /**
+ * Same as {@link #set(int, byte[])} but reallocates if <code>index</code>
+ * is larger than capacity.
+ */
+ public void setSafe(int index, byte[] value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Sets the value if isSet is positive, otherwise sets the index to null/invalid.
+ */
+ public void set(int index, int isSet, byte[] value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ public void setSafe(int index, int isSet, byte[] value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param buffer ArrowBuf containing binary value.
+ */
+ public void set(int index, ArrowBuf buffer) {
+ assert index >= 0;
+ assert byteWidth <= buffer.capacity();
+ BitVectorHelper.setBit(validityBuffer, index);
+ valueBuffer.setBytes((long) index * byteWidth, buffer, 0, byteWidth);
+ }
+
+ /**
+ * Same as {@link #set(int, ArrowBuf)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param buffer ArrowBuf containing binary value.
+ */
+ public void setSafe(int index, ArrowBuf buffer) {
+ handleSafe(index);
+ set(index, buffer);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param buffer ArrowBuf containing binary value.
+ */
+ public void set(int index, int isSet, ArrowBuf buffer) {
+ if (isSet > 0) {
+ set(index, buffer);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, ArrowBuf)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param buffer ArrowBuf containing binary value.
+ */
+ public void setSafe(int index, int isSet, ArrowBuf buffer) {
+ handleSafe(index);
+ set(index, isSet, buffer);
+ }
+
+ /**
+ * Set the variable length element at the specified index to the data
+ * buffer supplied in the holder.
+ *
+ * @param index position of the element to set
+ * @param holder holder that carries data buffer.
+ */
+ public void set(int index, FixedSizeBinaryHolder holder) {
+ assert holder.byteWidth == byteWidth;
+ set(index, holder.buffer);
+ }
+
+ /**
+ * Same as {@link #set(int, FixedSizeBinaryHolder)} except that it handles the
+ * case where index and length of new element are beyond the existing
+ * capacity of the vector.
+ *
+ * @param index position of the element to set
+ * @param holder holder that carries data buffer.
+ */
+ public void setSafe(int index, FixedSizeBinaryHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Set the variable length element at the specified index to the data
+ * buffer supplied in the holder.
+ *
+ * @param index position of the element to set
+ * @param holder holder that carries data buffer.
+ */
+ public void set(int index, NullableFixedSizeBinaryHolder holder) {
+ assert holder.byteWidth == byteWidth;
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException("holder has a negative isSet value");
+ } else if (holder.isSet > 0) {
+ set(index, holder.buffer);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, NullableFixedSizeBinaryHolder)} except that it handles the
+ * case where index and length of new element are beyond the existing
+ * capacity of the vector.
+ *
+ * @param index position of the element to set
+ * @param holder holder that carries data buffer.
+ */
+ public void setSafe(int index, NullableFixedSizeBinaryHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Given a data buffer, get the value stored at a particular position
+ * in the vector.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return value stored at the index.
+ */
+ public static byte[] get(final ArrowBuf buffer, final int index, final int byteWidth) {
+ final byte[] dst = new byte[byteWidth];
+ buffer.getBytes((long) index * byteWidth, dst, 0, byteWidth);
+ return dst;
+ }
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((FixedSizeBinaryVector) to);
+ }
+
+ private class TransferImpl implements TransferPair {
+ FixedSizeBinaryVector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new FixedSizeBinaryVector(ref, allocator, FixedSizeBinaryVector.this.byteWidth);
+ }
+
+ public TransferImpl(FixedSizeBinaryVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public FixedSizeBinaryVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, FixedSizeBinaryVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/FixedWidthVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/FixedWidthVector.java
new file mode 100644
index 000000000..58effeecb
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/FixedWidthVector.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+/**
+ * Interface for all fixed width {@link ElementAddressableVector} (e.g. integer, fixed size binary, etc).
+ */
+public interface FixedWidthVector extends ElementAddressableVector {
+
+ /**
+ * Allocate a new memory space for this vector. Must be called prior to using the ValueVector.
+ *
+ * @param valueCount Number of values in the vector.
+ */
+ void allocateNew(int valueCount);
+
+ /**
+ * Zero out the underlying buffer backing this vector.
+ */
+ void zeroVector();
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/Float4Vector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/Float4Vector.java
new file mode 100644
index 000000000..365a1529b
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/Float4Vector.java
@@ -0,0 +1,361 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.Float4ReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.Float4Holder;
+import org.apache.arrow.vector.holders.NullableFloat4Holder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * Float4Vector implements a fixed width vector (4 bytes) of
+ * float values which could be null. A validity buffer (bit vector) is
+ * maintained to track which elements in the vector are null.
+ */
+public final class Float4Vector extends BaseFixedWidthVector implements FloatingPointVector {
+ public static final byte TYPE_WIDTH = 4;
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a Float4Vector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public Float4Vector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.FLOAT4.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a Float4Vector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public Float4Vector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a Float4Vector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public Float4Vector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ reader = new Float4ReaderImpl(Float4Vector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.FLOAT4;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public float get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return valueBuffer.getFloat((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableFloat4Holder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getFloat((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Float getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return valueBuffer.getFloat((long) index * TYPE_WIDTH);
+ }
+ }
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ private void setValue(int index, float value) {
+ valueBuffer.setFloat((long) index * TYPE_WIDTH, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, float value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableFloat4Holder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, Float4Holder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, float)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, float value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableFloat4Holder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableFloat4Holder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, Float4Holder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, Float4Holder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void set(int index, int isSet, float value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, float)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void setSafe(int index, int isSet, float value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+ /**
+ * Given a data buffer, get the value stored at a particular position
+ * in the vector.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return value stored at the index.
+ */
+ public static float get(final ArrowBuf buffer, final int index) {
+ return buffer.getFloat((long) index * TYPE_WIDTH);
+ }
+
+ @Override
+ public void setWithPossibleTruncate(int index, double value) {
+ set(index, (float) value);
+ }
+
+ @Override
+ public void setSafeWithPossibleTruncate(int index, double value) {
+ setSafe(index, (float) value);
+ }
+
+ @Override
+ public double getValueAsDouble(int index) {
+ return get(index);
+ }
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((Float4Vector) to);
+ }
+
+ private class TransferImpl implements TransferPair {
+ Float4Vector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new Float4Vector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(Float4Vector to) {
+ this.to = to;
+ }
+
+ @Override
+ public Float4Vector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, Float4Vector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/Float8Vector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/Float8Vector.java
new file mode 100644
index 000000000..948390d46
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/Float8Vector.java
@@ -0,0 +1,362 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.Float8ReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.Float8Holder;
+import org.apache.arrow.vector.holders.NullableFloat8Holder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * Float8Vector implements a fixed width vector (8 bytes) of
+ * double values which could be null. A validity buffer (bit vector) is
+ * maintained to track which elements in the vector are null.
+ */
+public final class Float8Vector extends BaseFixedWidthVector implements FloatingPointVector {
+ public static final byte TYPE_WIDTH = 8;
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a Float8Vector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public Float8Vector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.FLOAT8.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a Float8Vector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public Float8Vector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a Float8Vector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public Float8Vector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ reader = new Float8ReaderImpl(Float8Vector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.FLOAT8;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public double get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return valueBuffer.getDouble((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableFloat8Holder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getDouble((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Double getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return valueBuffer.getDouble((long) index * TYPE_WIDTH);
+ }
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ private void setValue(int index, double value) {
+ valueBuffer.setDouble((long) index * TYPE_WIDTH, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, double value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableFloat8Holder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, Float8Holder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, double)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, double value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableFloat8Holder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableFloat8Holder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, Float8Holder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, Float8Holder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void set(int index, int isSet, double value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, double)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void setSafe(int index, int isSet, double value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+ /**
+ * Given a data buffer, get the value stored at a particular position
+ * in the vector.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return value stored at the index.
+ */
+ public static double get(final ArrowBuf buffer, final int index) {
+ return buffer.getDouble((long) index * TYPE_WIDTH);
+ }
+
+ @Override
+ public void setWithPossibleTruncate(int index, double value) {
+ set(index, value);
+ }
+
+ @Override
+ public void setSafeWithPossibleTruncate(int index, double value) {
+ setSafe(index, value);
+ }
+
+ @Override
+ public double getValueAsDouble(int index) {
+ return get(index);
+ }
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((Float8Vector) to);
+ }
+
+ private class TransferImpl implements TransferPair {
+ Float8Vector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new Float8Vector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(Float8Vector to) {
+ this.to = to;
+ }
+
+ @Override
+ public Float8Vector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, Float8Vector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/FloatingPointVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/FloatingPointVector.java
new file mode 100644
index 000000000..4c5143de6
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/FloatingPointVector.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+/**
+ * The interface for vectors with floating point values.
+ */
+public interface FloatingPointVector extends ValueVector {
+
+ /**
+ * Sets the value at the given index, note this value may be truncated internally.
+ * @param index the index to set.
+ * @param value the value to set.
+ */
+ void setWithPossibleTruncate(int index, double value);
+
+ /**
+ * Sets the value at the given index, note this value may be truncated internally.
+ * Any expansion/reallocation is handled automatically.
+ * @param index the index to set.
+ * @param value the value to set.
+ */
+ void setSafeWithPossibleTruncate(int index, double value);
+
+ /**
+ * Gets the value at the given index.
+ * @param index the index to retrieve the value.
+ * @return the value at the index.
+ */
+ double getValueAsDouble(int index);
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/GenerateSampleData.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/GenerateSampleData.java
new file mode 100644
index 000000000..3da915541
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/GenerateSampleData.java
@@ -0,0 +1,337 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import java.math.BigDecimal;
+import java.nio.charset.Charset;
+
+/**
+ * Helper class to generate test data for Nullable fixed and variable
+ * width scalar vectors. Previous implementations of java vector classes
+ * provided generateTestData(now deprecated) API to populate the vector
+ * with sample data. This class should be used for that purpose.
+ */
+public class GenerateSampleData {
+ private GenerateSampleData() {}
+
+ /** Populates <code>vector</code> with <code>valueCount</code> random values. */
+ public static void generateTestData(final ValueVector vector, final int valueCount) {
+ if (vector instanceof IntVector) {
+ writeIntData((IntVector) vector, valueCount);
+ } else if (vector instanceof DecimalVector) {
+ writeDecimalData((DecimalVector) vector, valueCount);
+ } else if (vector instanceof BitVector) {
+ writeBooleanData((BitVector) vector, valueCount);
+ } else if (vector instanceof VarCharVector) {
+ writeVarCharData((VarCharVector) vector, valueCount);
+ } else if (vector instanceof VarBinaryVector) {
+ writeVarBinaryData((VarBinaryVector) vector, valueCount);
+ } else if (vector instanceof BigIntVector) {
+ writeBigIntData((BigIntVector) vector, valueCount);
+ } else if (vector instanceof Float4Vector) {
+ writeFloatData((Float4Vector) vector, valueCount);
+ } else if (vector instanceof Float8Vector) {
+ writeDoubleData((Float8Vector) vector, valueCount);
+ } else if (vector instanceof DateDayVector) {
+ writeDateDayData((DateDayVector) vector, valueCount);
+ } else if (vector instanceof DateMilliVector) {
+ writeDateMilliData((DateMilliVector) vector, valueCount);
+ } else if (vector instanceof IntervalDayVector) {
+ writeIntervalDayData((IntervalDayVector) vector, valueCount);
+ } else if (vector instanceof IntervalYearVector) {
+ writeIntervalYearData((IntervalYearVector) vector, valueCount);
+ } else if (vector instanceof SmallIntVector) {
+ writeSmallIntData((SmallIntVector) vector, valueCount);
+ } else if (vector instanceof TinyIntVector) {
+ writeTinyIntData((TinyIntVector) vector, valueCount);
+ } else if (vector instanceof TimeMicroVector) {
+ writeTimeMicroData((TimeMicroVector) vector, valueCount);
+ } else if (vector instanceof TimeMilliVector) {
+ writeTimeMilliData((TimeMilliVector) vector, valueCount);
+ } else if (vector instanceof TimeNanoVector) {
+ writeTimeNanoData((TimeNanoVector) vector, valueCount);
+ } else if (vector instanceof TimeSecVector) {
+ writeTimeSecData((TimeSecVector) vector, valueCount);
+ } else if (vector instanceof TimeStampSecVector) {
+ writeTimeStampData((TimeStampSecVector) vector, valueCount);
+ } else if (vector instanceof TimeStampMicroVector) {
+ writeTimeStampData((TimeStampMicroVector) vector, valueCount);
+ } else if (vector instanceof TimeStampMilliVector) {
+ writeTimeStampData((TimeStampMilliVector) vector, valueCount);
+ } else if (vector instanceof TimeStampNanoVector) {
+ writeTimeStampData((TimeStampNanoVector) vector, valueCount);
+ } else if (vector instanceof TimeStampSecTZVector) {
+ writeTimeStampData((TimeStampSecTZVector) vector, valueCount);
+ } else if (vector instanceof TimeStampMicroTZVector) {
+ writeTimeStampData((TimeStampMicroTZVector) vector, valueCount);
+ } else if (vector instanceof TimeStampMilliTZVector) {
+ writeTimeStampData((TimeStampMilliTZVector) vector, valueCount);
+ } else if (vector instanceof TimeStampNanoTZVector) {
+ writeTimeStampData((TimeStampNanoTZVector) vector, valueCount);
+ }
+ }
+
+ private static void writeTimeStampData(TimeStampVector vector, int valueCount) {
+ final long even = 100000;
+ final long odd = 200000;
+ for (int i = 0; i < valueCount; i++) {
+ if (i % 2 == 0) {
+ vector.setSafe(i, even);
+ } else {
+ vector.setSafe(i, odd);
+ }
+ }
+ vector.setValueCount(valueCount);
+ }
+
+ private static void writeDecimalData(DecimalVector vector, int valueCount) {
+ final BigDecimal even = new BigDecimal(0.0543278923);
+ final BigDecimal odd = new BigDecimal(2.0543278923);
+ for (int i = 0; i < valueCount; i++) {
+ if (i % 2 == 0) {
+ vector.setSafe(i, even);
+ } else {
+ vector.setSafe(i, odd);
+ }
+ }
+ vector.setValueCount(valueCount);
+ }
+
+ private static void writeIntData(IntVector vector, int valueCount) {
+ final int even = 1000;
+ final int odd = 2000;
+ for (int i = 0; i < valueCount; i++) {
+ if (i % 2 == 0) {
+ vector.setSafe(i, even);
+ } else {
+ vector.setSafe(i, odd);
+ }
+ }
+ vector.setValueCount(valueCount);
+ }
+
+ private static void writeBooleanData(BitVector vector, int valueCount) {
+ final int even = 0;
+ final int odd = 1;
+ for (int i = 0; i < valueCount; i++) {
+ if (i % 2 == 0) {
+ vector.setSafe(i, even);
+ } else {
+ vector.setSafe(i, odd);
+ }
+ }
+ vector.setValueCount(valueCount);
+ }
+
+ private static void writeIntervalYearData(IntervalYearVector vector, int valueCount) {
+ final int even = 1;
+ final int odd = 2;
+ for (int i = 0; i < valueCount; i++) {
+ if (i % 2 == 0) {
+ vector.setSafe(i, even);
+ } else {
+ vector.setSafe(i, odd);
+ }
+ }
+ vector.setValueCount(valueCount);
+ }
+
+ private static void writeIntervalDayData(IntervalDayVector vector, int valueCount) {
+ for (int i = 0; i < valueCount; i++) {
+ if (i % 2 == 0) {
+ vector.setSafe(i, 1, 50);
+ } else {
+ vector.setSafe(i, 2, 100);
+ }
+ }
+ vector.setValueCount(valueCount);
+ }
+
+ private static void writeTimeSecData(TimeSecVector vector, int valueCount) {
+ final int even = 500;
+ final int odd = 900;
+ for (int i = 0; i < valueCount; i++) {
+ if (i % 2 == 0) {
+ vector.setSafe(i, even);
+ } else {
+ vector.setSafe(i, odd);
+ }
+ }
+ vector.setValueCount(valueCount);
+ }
+
+ private static void writeTimeMilliData(TimeMilliVector vector, int valueCount) {
+ final int even = 1000;
+ final int odd = 2000;
+ for (int i = 0; i < valueCount; i++) {
+ if (i % 2 == 0) {
+ vector.setSafe(i, even);
+ } else {
+ vector.setSafe(i, odd);
+ }
+ }
+ vector.setValueCount(valueCount);
+ }
+
+ private static void writeTimeMicroData(TimeMicroVector vector, int valueCount) {
+ final long even = 1000000000;
+ final long odd = 2000000000;
+ for (int i = 0; i < valueCount; i++) {
+ if (i % 2 == 0) {
+ vector.setSafe(i, even);
+ } else {
+ vector.setSafe(i, odd);
+ }
+ }
+ vector.setValueCount(valueCount);
+
+ }
+
+ private static void writeTimeNanoData(TimeNanoVector vector, int valueCount) {
+ final long even = 1000000000;
+ final long odd = 2000000000;
+ for (int i = 0; i < valueCount; i++) {
+ if (i % 2 == 0) {
+ vector.setSafe(i, even);
+ } else {
+ vector.setSafe(i, odd);
+ }
+ }
+ vector.setValueCount(valueCount);
+ }
+
+ private static void writeDateDayData(DateDayVector vector, int valueCount) {
+ final int even = 1000;
+ final int odd = 2000;
+ for (int i = 0; i < valueCount; i++) {
+ if (i % 2 == 0) {
+ vector.setSafe(i, even);
+ } else {
+ vector.setSafe(i, odd);
+ }
+ }
+ vector.setValueCount(valueCount);
+ }
+
+ private static void writeDateMilliData(DateMilliVector vector, int valueCount) {
+ final long even = 1000000000;
+ final long odd = 2000000000;
+ for (int i = 0; i < valueCount; i++) {
+ if (i % 2 == 0) {
+ vector.setSafe(i, even);
+ } else {
+ vector.setSafe(i, odd);
+ }
+ }
+ vector.setValueCount(valueCount);
+ }
+
+ private static void writeSmallIntData(SmallIntVector vector, int valueCount) {
+ final short even = 10;
+ final short odd = 20;
+ for (int i = 0; i < valueCount; i++) {
+ if (i % 2 == 0) {
+ vector.setSafe(i, even);
+ } else {
+ vector.setSafe(i, odd);
+ }
+ }
+ vector.setValueCount(valueCount);
+ }
+
+ private static void writeTinyIntData(TinyIntVector vector, int valueCount) {
+ final byte even = 1;
+ final byte odd = 2;
+ for (int i = 0; i < valueCount; i++) {
+ if (i % 2 == 0) {
+ vector.setSafe(i, even);
+ } else {
+ vector.setSafe(i, odd);
+ }
+ }
+ vector.setValueCount(valueCount);
+ }
+
+ private static void writeBigIntData(BigIntVector vector, int valueCount) {
+ final long even = 1000000000;
+ final long odd = 2000000000;
+ for (int i = 0; i < valueCount; i++) {
+ if (i % 2 == 0) {
+ vector.setSafe(i, even);
+ } else {
+ vector.setSafe(i, odd);
+ }
+ }
+ vector.setValueCount(valueCount);
+ }
+
+ private static void writeFloatData(Float4Vector vector, int valueCount) {
+ final float even = 20.3f;
+ final float odd = 40.2f;
+ for (int i = 0; i < valueCount; i++) {
+ if (i % 2 == 0) {
+ vector.setSafe(i, even);
+ } else {
+ vector.setSafe(i, odd);
+ }
+ }
+ vector.setValueCount(valueCount);
+ }
+
+ private static void writeDoubleData(Float8Vector vector, int valueCount) {
+ final double even = 20.2373;
+ final double odd = 40.2378;
+ for (int i = 0; i < valueCount; i++) {
+ if (i % 2 == 0) {
+ vector.setSafe(i, even);
+ } else {
+ vector.setSafe(i, odd);
+ }
+ }
+ vector.setValueCount(valueCount);
+ }
+
+ private static void writeVarBinaryData(VarBinaryVector vector, int valueCount) {
+ Charset utf8Charset = Charset.forName("UTF-8");
+ final byte[] even = "AAAAA1".getBytes(utf8Charset);
+ final byte[] odd = "BBBBBBBBB2".getBytes(utf8Charset);
+ for (int i = 0; i < valueCount; i++) {
+ if (i % 2 == 0) {
+ vector.setSafe(i, even);
+ } else {
+ vector.setSafe(i, odd);
+ }
+ }
+ vector.setValueCount(valueCount);
+ }
+
+ private static void writeVarCharData(VarCharVector vector, int valueCount) {
+ Charset utf8Charset = Charset.forName("UTF-8");
+ final byte[] even = "AAAAA1".getBytes(utf8Charset);
+ final byte[] odd = "BBBBBBBBB2".getBytes(utf8Charset);
+ for (int i = 0; i < valueCount; i++) {
+ if (i % 2 == 0) {
+ vector.setSafe(i, even);
+ } else {
+ vector.setSafe(i, odd);
+ }
+ }
+ vector.setValueCount(valueCount);
+ }
+}
+
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/IntVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/IntVector.java
new file mode 100644
index 000000000..e591ec1e8
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/IntVector.java
@@ -0,0 +1,362 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.IntReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.IntHolder;
+import org.apache.arrow.vector.holders.NullableIntHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * IntVector implements a fixed width (4 bytes) vector of
+ * integer values which could be null. A validity buffer (bit vector) is
+ * maintained to track which elements in the vector are null.
+ */
+public final class IntVector extends BaseFixedWidthVector implements BaseIntVector {
+ public static final byte TYPE_WIDTH = 4;
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a IntVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public IntVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.INT.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a IntVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public IntVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a IntVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public IntVector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ reader = new IntReaderImpl(IntVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.INT;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public int get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return valueBuffer.getInt((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableIntHolder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getInt((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Integer getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return valueBuffer.getInt((long) index * TYPE_WIDTH);
+ }
+ }
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ private void setValue(int index, int value) {
+ valueBuffer.setInt((long) index * TYPE_WIDTH, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, int value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableIntHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, IntHolder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, int)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, int value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableIntHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableIntHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, IntHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, IntHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void set(int index, int isSet, int value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, int)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void setSafe(int index, int isSet, int value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+ /**
+ * Given a data buffer, get the value stored at a particular position
+ * in the vector.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return value stored at the index.
+ */
+ public static int get(final ArrowBuf buffer, final int index) {
+ return buffer.getInt((long) index * TYPE_WIDTH);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((IntVector) to);
+ }
+
+ @Override
+ public void setWithPossibleTruncate(int index, long value) {
+ this.setSafe(index, (int) value);
+ }
+
+ @Override
+ public void setUnsafeWithPossibleTruncate(int index, long value) {
+ this.set(index, (int) value);
+ }
+
+ @Override
+ public long getValueAsLong(int index) {
+ return this.get(index);
+ }
+
+ private class TransferImpl implements TransferPair {
+ IntVector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new IntVector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(IntVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public IntVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, IntVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/IntervalDayVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/IntervalDayVector.java
new file mode 100644
index 000000000..0dc860e6b
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/IntervalDayVector.java
@@ -0,0 +1,433 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import java.time.Duration;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.IntervalDayReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.IntervalDayHolder;
+import org.apache.arrow.vector.holders.NullableIntervalDayHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * IntervalDayVector implements a fixed width vector (8 bytes) of
+ * interval (days and milliseconds) values which could be null.
+ * A validity buffer (bit vector) is maintained to track which elements in the
+ * vector are null.
+ */
+public final class IntervalDayVector extends BaseFixedWidthVector {
+ public static final byte TYPE_WIDTH = 8;
+ private static final byte MILLISECOND_OFFSET = 4;
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a IntervalDayVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public IntervalDayVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.INTERVALDAY.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a IntervalDayVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public IntervalDayVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a IntervalDayVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public IntervalDayVector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ reader = new IntervalDayReaderImpl(IntervalDayVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.INTERVALDAY;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+ /**
+ * Given a data buffer, get the number of days stored at a particular position
+ * in the vector.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return day value stored at the index.
+ */
+ public static int getDays(final ArrowBuf buffer, final int index) {
+ return buffer.getInt((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Given a data buffer, get the get the number of milliseconds stored at a particular position
+ * in the vector.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return milliseconds value stored at the index.
+ */
+ public static int getMilliseconds(final ArrowBuf buffer, final int index) {
+ return buffer.getInt((long) index * TYPE_WIDTH + MILLISECOND_OFFSET);
+ }
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public ArrowBuf get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ return null;
+ }
+ return valueBuffer.slice((long) index * TYPE_WIDTH, TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableIntervalDayHolder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ final long startIndex = (long) index * TYPE_WIDTH;
+ holder.isSet = 1;
+ holder.days = valueBuffer.getInt(startIndex);
+ holder.milliseconds = valueBuffer.getInt(startIndex + MILLISECOND_OFFSET);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Duration getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ final long startIndex = (long) index * TYPE_WIDTH;
+ final int days = valueBuffer.getInt(startIndex);
+ final int milliseconds = valueBuffer.getInt(startIndex + MILLISECOND_OFFSET);
+ return Duration.ofDays(days).plusMillis(milliseconds);
+ }
+ }
+
+ /**
+ * Get the Interval value at a given index as a {@link StringBuilder} object.
+ *
+ * @param index position of the element
+ * @return String Builder object with Interval value as
+ * [days, hours, minutes, seconds, millis]
+ */
+ public StringBuilder getAsStringBuilder(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return getAsStringBuilderHelper(index);
+ }
+ }
+
+ private StringBuilder getAsStringBuilderHelper(int index) {
+ final long startIndex = (long) index * TYPE_WIDTH;
+
+ final int days = valueBuffer.getInt(startIndex);
+ int millis = valueBuffer.getInt(startIndex + MILLISECOND_OFFSET);
+
+ final int hours = millis / (org.apache.arrow.vector.util.DateUtility.hoursToMillis);
+ millis = millis % (org.apache.arrow.vector.util.DateUtility.hoursToMillis);
+
+ final int minutes = millis / (org.apache.arrow.vector.util.DateUtility.minutesToMillis);
+ millis = millis % (org.apache.arrow.vector.util.DateUtility.minutesToMillis);
+
+ final int seconds = millis / (org.apache.arrow.vector.util.DateUtility.secondsToMillis);
+ millis = millis % (org.apache.arrow.vector.util.DateUtility.secondsToMillis);
+
+ final String dayString = (Math.abs(days) == 1) ? " day " : " days ";
+
+ return (new StringBuilder()
+ .append(days).append(dayString)
+ .append(hours).append(":")
+ .append(minutes).append(":")
+ .append(seconds).append(".")
+ .append(millis));
+ }
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, ArrowBuf value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ valueBuffer.setBytes((long) index * TYPE_WIDTH, value, 0, TYPE_WIDTH);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param days days for the interval
+ * @param milliseconds milliseconds for the interval
+ */
+ public void set(int index, int days, int milliseconds) {
+ final long offsetIndex = (long) index * TYPE_WIDTH;
+ BitVectorHelper.setBit(validityBuffer, index);
+ valueBuffer.setInt(offsetIndex, days);
+ valueBuffer.setInt((offsetIndex + MILLISECOND_OFFSET), milliseconds);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableIntervalDayHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ set(index, holder.days, holder.milliseconds);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, IntervalDayHolder holder) {
+ set(index, holder.days, holder.milliseconds);
+ }
+
+ /**
+ * Same as {@link #set(int, ArrowBuf)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, ArrowBuf value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, int, int)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param days days for the interval
+ * @param milliseconds milliseconds for the interval
+ */
+ public void setSafe(int index, int days, int milliseconds) {
+ handleSafe(index);
+ set(index, days, milliseconds);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableIntervalDayHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableIntervalDayHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, IntervalDayHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, IntervalDayHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param days days component of interval
+ * @param milliseconds millisecond component of interval
+ */
+ public void set(int index, int isSet, int days, int milliseconds) {
+ if (isSet > 0) {
+ set(index, days, milliseconds);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, int, int)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param days days component of interval
+ * @param milliseconds millisecond component of interval
+ */
+ public void setSafe(int index, int isSet, int days, int milliseconds) {
+ handleSafe(index);
+ set(index, isSet, days, milliseconds);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((IntervalDayVector) to);
+ }
+
+ private class TransferImpl implements TransferPair {
+ IntervalDayVector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new IntervalDayVector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(IntervalDayVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public IntervalDayVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, IntervalDayVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/IntervalMonthDayNanoVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/IntervalMonthDayNanoVector.java
new file mode 100644
index 000000000..ba3a26a89
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/IntervalMonthDayNanoVector.java
@@ -0,0 +1,442 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import java.time.Duration;
+import java.time.Period;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.IntervalMonthDayNanoReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.IntervalMonthDayNanoHolder;
+import org.apache.arrow.vector.holders.NullableIntervalMonthDayNanoHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * IntervalMonthDayNanoVector implements a fixed width vector (16 bytes) of
+ * interval (month, days and nanoseconds) values which could be null.
+ * A validity buffer (bit vector) is maintained to track which elements in the
+ * vector are null.
+ *
+ * Month, day and nanoseconds are indepndent from one another and there
+ * is no specific limits imposed on their values.
+ */
+public final class IntervalMonthDayNanoVector extends BaseFixedWidthVector {
+ public static final byte TYPE_WIDTH = 16;
+ private static final byte DAY_OFFSET = 4;
+ private static final byte NANOSECOND_OFFSET = 8;
+ private final FieldReader reader;
+
+
+ /**
+ * Instantiate a IntervalMonthDayNanoVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public IntervalMonthDayNanoVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.INTERVALDAY.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a IntervalMonthDayNanoVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public IntervalMonthDayNanoVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a IntervalMonthDayNanoVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public IntervalMonthDayNanoVector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ reader = new IntervalMonthDayNanoReaderImpl(IntervalMonthDayNanoVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.INTERVALMONTHDAYNANO;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+ /**
+ * Given a data buffer, get the number of months stored at a particular position
+ * in the vector.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return day value stored at the index.
+ */
+ public static int getMonths(final ArrowBuf buffer, final int index) {
+ return buffer.getInt((long) index * TYPE_WIDTH);
+ }
+
+
+ /**
+ * Given a data buffer, get the number of days stored at a particular position
+ * in the vector.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return day value stored at the index.
+ */
+ public static int getDays(final ArrowBuf buffer, final int index) {
+ return buffer.getInt((long) index * TYPE_WIDTH + DAY_OFFSET);
+ }
+
+ /**
+ * Given a data buffer, get the get the number of nanoseconds stored at a particular position
+ * in the vector.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return nanoseconds value stored at the index.
+ */
+ public static long getNanoseconds(final ArrowBuf buffer, final int index) {
+ return buffer.getLong((long) index * TYPE_WIDTH + NANOSECOND_OFFSET);
+ }
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public ArrowBuf get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ return null;
+ }
+ return valueBuffer.slice((long) index * TYPE_WIDTH, TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableIntervalMonthDayNanoHolder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ final long startIndex = (long) index * TYPE_WIDTH;
+ holder.isSet = 1;
+ holder.months = valueBuffer.getInt(startIndex);
+ holder.days = valueBuffer.getInt(startIndex + DAY_OFFSET);
+ holder.nanoseconds = valueBuffer.getLong(startIndex + NANOSECOND_OFFSET);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public PeriodDuration getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ final long startIndex = (long) index * TYPE_WIDTH;
+ final int months = valueBuffer.getInt(startIndex);
+ final int days = valueBuffer.getInt(startIndex + DAY_OFFSET);
+ final long nanoseconds = valueBuffer.getLong(startIndex + NANOSECOND_OFFSET);
+
+ return new PeriodDuration(Period.ofMonths(months).plusDays(days),
+ Duration.ofNanos(nanoseconds));
+ }
+ }
+
+ /**
+ * Get the Interval value at a given index as a {@link StringBuilder} object.
+ *
+ * @param index position of the element
+ * @return String Builder object with Interval value as
+ */
+ public StringBuilder getAsStringBuilder(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return getAsStringBuilderHelper(index);
+ }
+ }
+
+ private StringBuilder getAsStringBuilderHelper(int index) {
+ return new StringBuilder().append(getObject(index).toString()).append(" ");
+ }
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, ArrowBuf value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ valueBuffer.setBytes((long) index * TYPE_WIDTH, value, 0, TYPE_WIDTH);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param months months component of interval
+ * @param days days component of interval
+ * @param nanoseconds nanosecond component of interval
+ */
+ public void set(int index, int months, int days, long nanoseconds) {
+ final long offsetIndex = (long) index * TYPE_WIDTH;
+ BitVectorHelper.setBit(validityBuffer, index);
+ valueBuffer.setInt(offsetIndex, months);
+ valueBuffer.setInt(offsetIndex + DAY_OFFSET, days);
+ valueBuffer.setLong((offsetIndex + NANOSECOND_OFFSET), nanoseconds);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableIntervalMonthDayNanoHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ set(index, holder.months, holder.days, holder.nanoseconds);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, IntervalMonthDayNanoHolder holder) {
+ set(index, holder.months, holder.days, holder.nanoseconds);
+ }
+
+ /**
+ * Same as {@link #set(int, ArrowBuf)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, ArrowBuf value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, int, int, long)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param months months for the interval
+ * @param days days for the interval
+ * @param nanoseconds nanoseconds for the interval
+ */
+ public void setSafe(int index, int months, int days, long nanoseconds) {
+ handleSafe(index);
+ set(index, months, days, nanoseconds);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableIntervalMonthDayNanoHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableIntervalMonthDayNanoHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, IntervalMonthDayNanoHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, IntervalMonthDayNanoHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param months months component of interval
+ * @param days days component of interval
+ * @param nanoseconds nanosecond component of interval
+ */
+ public void set(int index, int isSet, int months, int days, long nanoseconds) {
+ if (isSet > 0) {
+ set(index, months, days, nanoseconds);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, int, int, long)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param months months component of interval
+ * @param days days component of interval
+ * @param nanoseconds nanosecond component of interval
+ */
+ public void setSafe(int index, int isSet, int months, int days,
+ long nanoseconds) {
+ handleSafe(index);
+ set(index, isSet, months, days, nanoseconds);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((IntervalMonthDayNanoVector) to);
+ }
+
+ private class TransferImpl implements TransferPair {
+ IntervalMonthDayNanoVector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new IntervalMonthDayNanoVector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(IntervalMonthDayNanoVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public IntervalMonthDayNanoVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, IntervalMonthDayNanoVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/IntervalYearVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/IntervalYearVector.java
new file mode 100644
index 000000000..7ddfe6b78
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/IntervalYearVector.java
@@ -0,0 +1,382 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import java.time.Period;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.IntervalYearReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.IntervalYearHolder;
+import org.apache.arrow.vector.holders.NullableIntervalYearHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * IntervalYearVector implements a fixed width (4 bytes) vector of
+ * interval (years and months) values which could be null. A validity buffer
+ * (bit vector) is maintained to track which elements in the vector are null.
+ */
+public final class IntervalYearVector extends BaseFixedWidthVector {
+ public static final byte TYPE_WIDTH = 4;
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a IntervalYearVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public IntervalYearVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.INTERVALYEAR.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a IntervalYearVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public IntervalYearVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a IntervalYearVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public IntervalYearVector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ reader = new IntervalYearReaderImpl(IntervalYearVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.INTERVALYEAR;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Given a data buffer, get the value stored at a particular position
+ * in the vector.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return value stored at the index.
+ */
+ public static int getTotalMonths(final ArrowBuf buffer, final int index) {
+ return buffer.getInt((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public int get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return valueBuffer.getInt((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableIntervalYearHolder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getInt((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Period getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ final int interval = valueBuffer.getInt((long) index * TYPE_WIDTH);
+ // TODO: verify interval is in months
+ return Period.ofMonths(interval);
+ }
+ }
+
+ /**
+ * Get the Interval value at a given index as a {@link StringBuilder} object.
+ *
+ * @param index position of the element
+ * @return String Builder object with Interval value as
+ * [years, months]
+ */
+ public StringBuilder getAsStringBuilder(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return getAsStringBuilderHelper(index);
+ }
+ }
+
+ private StringBuilder getAsStringBuilderHelper(int index) {
+ int value = valueBuffer.getInt((long) index * TYPE_WIDTH);
+
+ final int years = (value / org.apache.arrow.vector.util.DateUtility.yearsToMonths);
+ final int months = (value % org.apache.arrow.vector.util.DateUtility.yearsToMonths);
+
+ final String yearString = (Math.abs(years) == 1) ? " year " : " years ";
+ final String monthString = (Math.abs(months) == 1) ? " month " : " months ";
+
+ return (new StringBuilder()
+ .append(years)
+ .append(yearString)
+ .append(months)
+ .append(monthString));
+ }
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ private void setValue(int index, int value) {
+ valueBuffer.setInt((long) index * TYPE_WIDTH, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, int value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableIntervalYearHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, IntervalYearHolder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, int)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, int value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableIntervalYearHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableIntervalYearHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, IntervalYearHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, IntervalYearHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void set(int index, int isSet, int value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, int)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void setSafe(int index, int isSet, int value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((IntervalYearVector) to);
+ }
+
+ private class TransferImpl implements TransferPair {
+ IntervalYearVector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new IntervalYearVector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(IntervalYearVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public IntervalYearVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, IntervalYearVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java
new file mode 100644
index 000000000..e9d60b38e
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java
@@ -0,0 +1,305 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.LargeVarBinaryReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.LargeVarBinaryHolder;
+import org.apache.arrow.vector.holders.NullableLargeVarBinaryHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * LargeVarBinaryVector implements a large variable width vector of binary
+ * values which could be NULL. A validity buffer (bit vector) is maintained
+ * to track which elements in the vector are null.
+ * The size of the underlying buffer can be over 2GB.
+ */
+public final class LargeVarBinaryVector extends BaseLargeVariableWidthVector {
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a LargeVarBinaryVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public LargeVarBinaryVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.LARGEVARBINARY.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a LargeVarBinaryVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public LargeVarBinaryVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a LargeVarBinaryVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public LargeVarBinaryVector(Field field, BufferAllocator allocator) {
+ super(field, allocator);
+ reader = new LargeVarBinaryReaderImpl(LargeVarBinaryVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.LARGEVARBINARY;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the variable length element at specified index as byte array.
+ *
+ * @param index position of element to get
+ * @return array of bytes for non-null element, null otherwise
+ */
+ public byte[] get(int index) {
+ assert index >= 0;
+ if (isSet(index) == 0) {
+ return null;
+ }
+ final long startOffset = getStartOffset(index);
+ final int dataLength =
+ (int) (offsetBuffer.getLong((long) (index + 1) * OFFSET_WIDTH) - startOffset);
+ final byte[] result = new byte[dataLength];
+ valueBuffer.getBytes(startOffset, result, 0, dataLength);
+ return result;
+ }
+
+ /**
+ * Get the variable length element at specified index as Text.
+ *
+ * @param index position of element to get
+ * @return byte array for non-null element, null otherwise
+ */
+ public byte[] getObject(int index) {
+ return get(index);
+ }
+
+ /**
+ * Get the variable length element at specified index and sets the state
+ * in provided holder.
+ *
+ * @param index position of element to get
+ * @param holder data holder to be populated by this function
+ */
+ public void get(int index, NullableLargeVarBinaryHolder holder) {
+ assert index >= 0;
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.start = getStartOffset(index);
+ holder.end = offsetBuffer.getLong((long) (index + 1) * OFFSET_WIDTH);
+ holder.buffer = valueBuffer;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Set the variable length element at the specified index to the data
+ * buffer supplied in the holder.
+ *
+ * @param index position of the element to set
+ * @param holder holder that carries data buffer.
+ */
+ public void set(int index, LargeVarBinaryHolder holder) {
+ assert index >= 0;
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ final int dataLength = (int) (holder.end - holder.start);
+ final long startOffset = getStartOffset(index);
+ offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+ valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength);
+ lastSet = index;
+ }
+
+ /**
+ * Same as {@link #set(int, LargeVarBinaryHolder)} except that it handles the
+ * case where index and length of new element are beyond the existing
+ * capacity of the vector.
+ *
+ * @param index position of the element to set
+ * @param holder holder that carries data buffer.
+ */
+ public void setSafe(int index, LargeVarBinaryHolder holder) {
+ assert index >= 0;
+ final int dataLength = (int) (holder.end - holder.start);
+ handleSafe(index, dataLength);
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ final long startOffset = getStartOffset(index);
+ offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+ valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength);
+ lastSet = index;
+ }
+
+ /**
+ * Set the variable length element at the specified index to the data
+ * buffer supplied in the holder.
+ *
+ * @param index position of the element to set
+ * @param holder holder that carries data buffer.
+ */
+ public void set(int index, NullableLargeVarBinaryHolder holder) {
+ assert index >= 0;
+ fillHoles(index);
+ BitVectorHelper.setValidityBit(validityBuffer, index, holder.isSet);
+ final long startOffset = getStartOffset(index);
+ if (holder.isSet != 0) {
+ final int dataLength = (int) (holder.end - holder.start);
+ offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+ valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength);
+ } else {
+ offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset);
+ }
+ lastSet = index;
+ }
+
+ /**
+ * Same as {@link #set(int, NullableLargeVarBinaryHolder)} except that it handles the
+ * case where index and length of new element are beyond the existing
+ * capacity of the vector.
+ *
+ * @param index position of the element to set
+ * @param holder holder that carries data buffer.
+ */
+ public void setSafe(int index, NullableLargeVarBinaryHolder holder) {
+ assert index >= 0;
+ if (holder.isSet != 0) {
+ final int dataLength = (int) (holder.end - holder.start);
+ handleSafe(index, dataLength);
+ fillHoles(index);
+ final long startOffset = getStartOffset(index);
+ offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+ valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength);
+ } else {
+ fillEmpties(index + 1);
+ }
+ BitVectorHelper.setValidityBit(validityBuffer, index, holder.isSet);
+ lastSet = index;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((LargeVarBinaryVector) to);
+ }
+
+ private class TransferImpl implements TransferPair {
+ LargeVarBinaryVector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new LargeVarBinaryVector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(LargeVarBinaryVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public LargeVarBinaryVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, LargeVarBinaryVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java
new file mode 100644
index 000000000..fd2057260
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java
@@ -0,0 +1,331 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.LargeVarCharReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.LargeVarCharHolder;
+import org.apache.arrow.vector.holders.NullableLargeVarCharHolder;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.Text;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * LargeVarCharVector implements a variable width vector of VARCHAR
+ * values which could be NULL. A validity buffer (bit vector) is maintained
+ * to track which elements in the vector are null.
+ * <p>
+ * The offset width of this vector is 8, so the underlying buffer can be larger than 2GB.
+ * </p>
+ */
+public final class LargeVarCharVector extends BaseLargeVariableWidthVector {
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a LargeVarCharVector. This doesn't allocate any memory for
+ * the data in vector.
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public LargeVarCharVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(Types.MinorType.LARGEVARCHAR.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a LargeVarCharVector. This doesn't allocate any memory for
+ * the data in vector.
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public LargeVarCharVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a LargeVarCharVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public LargeVarCharVector(Field field, BufferAllocator allocator) {
+ super(field, allocator);
+ reader = new LargeVarCharReaderImpl(LargeVarCharVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public Types.MinorType getMinorType() {
+ return Types.MinorType.LARGEVARCHAR;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the variable length element at specified index as byte array.
+ *
+ * @param index position of element to get
+ * @return array of bytes for non-null element, null otherwise
+ */
+ public byte[] get(int index) {
+ assert index >= 0;
+ if (isSet(index) == 0) {
+ return null;
+ }
+ final long startOffset = getStartOffset(index);
+ final int dataLength =
+ (int) (offsetBuffer.getLong((long) (index + 1) * OFFSET_WIDTH) - startOffset);
+ final byte[] result = new byte[dataLength];
+ valueBuffer.getBytes(startOffset, result, 0, dataLength);
+ return result;
+ }
+
+ /**
+ * Get the variable length element at specified index as Text.
+ *
+ * @param index position of element to get
+ * @return Text object for non-null element, null otherwise
+ */
+ public Text getObject(int index) {
+ byte[] b = get(index);
+ if (b == null) {
+ return null;
+ } else {
+ return new Text(b);
+ }
+ }
+
+ /**
+ * Get the variable length element at specified index and sets the state
+ * in provided holder.
+ *
+ * @param index position of element to get
+ * @param holder data holder to be populated by this function
+ */
+ public void get(int index, NullableLargeVarCharHolder holder) {
+ assert index >= 0;
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.start = getStartOffset(index);
+ holder.end = offsetBuffer.getLong((long) (index + 1) * OFFSET_WIDTH);
+ holder.buffer = valueBuffer;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Set the variable length element at the specified index to the data
+ * buffer supplied in the holder.
+ *
+ * @param index position of the element to set
+ * @param holder holder that carries data buffer.
+ */
+ public void set(int index, LargeVarCharHolder holder) {
+ assert index >= 0;
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ final int dataLength = (int) (holder.end - holder.start);
+ final long startOffset = getStartOffset(index);
+ offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+ valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength);
+ lastSet = index;
+ }
+
+ /**
+ * Same as {@link #set(int, LargeVarCharHolder)} except that it handles the
+ * case where index and length of new element are beyond the existing
+ * capacity of the vector.
+ *
+ * @param index position of the element to set
+ * @param holder holder that carries data buffer.
+ */
+ public void setSafe(int index, LargeVarCharHolder holder) {
+ assert index >= 0;
+ final int dataLength = (int) (holder.end - holder.start);
+ handleSafe(index, dataLength);
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ final long startOffset = getStartOffset(index);
+ offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+ valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength);
+ lastSet = index;
+ }
+
+ /**
+ * Set the variable length element at the specified index to the data
+ * buffer supplied in the holder.
+ *
+ * @param index position of the element to set
+ * @param holder holder that carries data buffer.
+ */
+ public void set(int index, NullableLargeVarCharHolder holder) {
+ assert index >= 0;
+ fillHoles(index);
+ BitVectorHelper.setValidityBit(validityBuffer, index, holder.isSet);
+ final long startOffset = getStartOffset(index);
+ if (holder.isSet != 0) {
+ final int dataLength = (int) (holder.end - holder.start);
+ offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+ valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength);
+ } else {
+ offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset);
+ }
+ lastSet = index;
+ }
+
+ /**
+ * Same as {@link #set(int, NullableLargeVarCharHolder)} except that it handles the
+ * case where index and length of new element are beyond the existing
+ * capacity of the vector.
+ *
+ * @param index position of the element to set
+ * @param holder holder that carries data buffer.
+ */
+ public void setSafe(int index, NullableLargeVarCharHolder holder) {
+ assert index >= 0;
+ if (holder.isSet != 0) {
+ final int dataLength = (int) (holder.end - holder.start);
+ handleSafe(index, dataLength);
+ fillHoles(index);
+ final long startOffset = getStartOffset(index);
+ offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+ valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength);
+ } else {
+ fillHoles(index + 1);
+ }
+ BitVectorHelper.setValidityBit(validityBuffer, index, holder.isSet);
+ lastSet = index;
+ }
+
+ /**
+ * Set the variable length element at the specified index to the
+ * content in supplied Text.
+ *
+ * @param index position of the element to set
+ * @param text Text object with data
+ */
+ public void set(int index, Text text) {
+ set(index, text.getBytes(), 0, text.getLength());
+ }
+
+ /**
+ * Same as {@link #set(int, NullableLargeVarCharHolder)} except that it handles the
+ * case where index and length of new element are beyond the existing
+ * capacity of the vector.
+ *
+ * @param index position of the element to set.
+ * @param text Text object with data
+ */
+ public void setSafe(int index, Text text) {
+ setSafe(index, text.getBytes(), 0, text.getLength());
+ }
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new LargeVarCharVector.TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new LargeVarCharVector.TransferImpl((LargeVarCharVector) to);
+ }
+
+ private class TransferImpl implements TransferPair {
+ LargeVarCharVector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new LargeVarCharVector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(LargeVarCharVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public LargeVarCharVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, LargeVarCharVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/NullCheckingForGet.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/NullCheckingForGet.java
new file mode 100644
index 000000000..9961c72a4
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/NullCheckingForGet.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+/**
+ * Configuration class to determine if null checking should be enabled or disabled for the "get" methods.
+ * For example, the get method of class org.apache.arrow.vector.Float8Vector first checks if the value
+ * at the given index is null, before retrieving the value. This configuration will turn on and off such checks.
+ *
+ * <p>Null checking is on by default. You can disable it by setting either the system property or the
+ * environmental variable to "false". The system property is named "arrow.enable_null_check_for_get" and
+ * the environmental variable is named "ARROW_ENABLE_NULL_CHECK_FOR_GET".
+ * When both the system property and the environmental variable are set, the system property takes precedence.
+ * </p>
+ * <p>
+ * Disabling null-checking in the "get" methods may lead to performance improvements.
+ * For example, suppose we have the following micro-benchmark:
+ * </p>
+ * <p>
+ * <pre>{@code
+ *
+ * Float8Vector vector = ...
+ *
+ * public void test() {
+ * sum = 0;
+ * for (int i = 0; i < 1024; i++) {
+ * vector.set(i, i + 10.0);
+ * safeSum += vector.get(i);
+ * }
+ * }
+ *
+ * }</pre>
+ * </p>
+ * <p>
+ * Performance evaluations of the micro-benchmark with the JMH framework reveal that, disabling null checking
+ * has the following effects:
+ * 1. The amounts of byte code and assembly code generated by JIT are both smaller.
+ * 2. The performance improves by about 30% (2.819 ± 0.005 us/op vs. 4.069 ± 0.004 us/op).
+ * </p>
+ * <p>
+ * Therefore, for scenarios where the user can be sure that the null-checking is unnecessary,
+ * it is beneficial to disable it with this configuration.
+ * </p>
+ */
+public class NullCheckingForGet {
+
+ /**
+ * The flag to indicate if null checking is enabled for "get" methods.
+ */
+ public static final boolean NULL_CHECKING_ENABLED;
+
+ static {
+ String envProperty = System.getenv("ARROW_ENABLE_NULL_CHECK_FOR_GET");
+ String sysProperty = System.getProperty("arrow.enable_null_check_for_get");
+
+ // The system property has a higher priority than the environmental variable.
+ String flagValue = sysProperty;
+ if (flagValue == null) {
+ flagValue = envProperty;
+ }
+
+ // The flag is set to false only if the system property/environmental
+ // variable is explicitly set to "false".
+ NULL_CHECKING_ENABLED = !"false".equals(flagValue);
+ }
+
+ private NullCheckingForGet() {
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java
new file mode 100644
index 000000000..1010d8d47
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java
@@ -0,0 +1,338 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.complex.BaseRepeatedValueVector.DATA_VECTOR_NAME;
+
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.OutOfMemoryException;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.complex.impl.NullReader;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * A null type vector.
+ */
+public class NullVector implements FieldVector {
+
+ private int valueCount;
+
+ protected Field field;
+
+ /**
+ * Instantiate a NullVector.
+ *
+ * @param name name of the vector
+ */
+ public NullVector(String name) {
+ this(name, FieldType.nullable(Types.MinorType.NULL.getType()));
+ }
+
+ /**
+ * Instantiate a NullVector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector.
+ */
+ public NullVector(String name, FieldType fieldType) {
+ this(new Field(name, fieldType, null));
+ }
+
+ /**
+ * Instantiate a NullVector.
+ *
+ * @param field field materialized by this vector.
+ */
+ public NullVector(Field field) {
+ this.valueCount = 0;
+ this.field = field;
+ }
+
+ @Deprecated
+ public NullVector() {
+ this(new Field(DATA_VECTOR_NAME, FieldType.nullable(new ArrowType.Null()), null));
+ }
+
+ @Override
+ public void close() {
+ }
+
+ @Override
+ public void clear() {
+ }
+
+ @Override
+ public void reset() {
+ }
+
+ @Override
+ public Field getField() {
+ return field;
+ }
+
+ @Override
+ public Types.MinorType getMinorType() {
+ return Types.MinorType.NULL;
+ }
+
+ @Override
+ public TransferPair getTransferPair(BufferAllocator allocator) {
+ return getTransferPair(null, allocator);
+ }
+
+ @Override
+ public Iterator<ValueVector> iterator() {
+ return Collections.emptyIterator();
+ }
+
+ @Override
+ public int getBufferSize() {
+ return 0;
+ }
+
+ @Override
+ public int getBufferSizeFor(final int valueCount) {
+ return 0;
+ }
+
+ @Override
+ public ArrowBuf[] getBuffers(boolean clear) {
+ return new ArrowBuf[0];
+ }
+
+ @Override
+ public void allocateNew() throws OutOfMemoryException {
+ allocateNewSafe();
+ }
+
+ @Override
+ public boolean allocateNewSafe() {
+ return true;
+ }
+
+ @Override
+ public void reAlloc() {
+ }
+
+ @Override
+ public BufferAllocator getAllocator() {
+ throw new UnsupportedOperationException("Tried to get allocator from NullVector");
+ }
+
+ @Override
+ public void setInitialCapacity(int numRecords) {
+ }
+
+ @Override
+ public int getValueCapacity() {
+ return this.valueCount;
+ }
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl();
+ }
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) {
+ return getTransferPair(ref, allocator);
+ }
+
+ @Override
+ public TransferPair makeTransferPair(ValueVector target) {
+ return new TransferImpl((NullVector) target);
+ }
+
+ @Override
+ public FieldReader getReader() {
+ return NullReader.INSTANCE;
+ }
+
+ @Override
+ public void initializeChildrenFromFields(List<Field> children) {
+ if (!children.isEmpty()) {
+ throw new IllegalArgumentException("Null vector has no children");
+ }
+ }
+
+ @Override
+ public List<FieldVector> getChildrenFromFields() {
+ return Collections.emptyList();
+ }
+
+ @Override
+ public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers) {
+ Preconditions.checkArgument(ownBuffers.isEmpty(), "Null vector has no buffers");
+ }
+
+ @Override
+ public List<ArrowBuf> getFieldBuffers() {
+ return Collections.emptyList();
+ }
+
+ /**
+ * Get the inner vectors.
+ *
+ * @deprecated This API will be removed as the current implementations no longer support inner vectors.
+ *
+ * @return the inner vectors for this field as defined by the TypeLayout
+ */
+ @Deprecated
+ @Override
+ public List<BufferBacked> getFieldInnerVectors() {
+ return Collections.emptyList();
+ }
+
+ @Override
+ public long getValidityBufferAddress() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long getDataBufferAddress() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long getOffsetBufferAddress() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public ArrowBuf getValidityBuffer() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public ArrowBuf getDataBuffer() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public ArrowBuf getOffsetBuffer() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public int getValueCount() {
+ return this.valueCount;
+ }
+
+ @Override
+ public void setValueCount(int valueCount) {
+ this.valueCount = valueCount;
+ }
+
+ @Override
+ public Object getObject(int index) {
+ return null;
+ }
+
+ @Override
+ public int getNullCount() {
+ return this.valueCount;
+ }
+
+ @Override
+ public boolean isNull(int index) {
+ return true;
+ }
+
+ @Override
+ public int hashCode(int index) {
+ return 31 * valueCount;
+ }
+
+ @Override
+ public int hashCode(int index, ArrowBufHasher hasher) {
+ return 31 * valueCount;
+ }
+
+ @Override
+ public <OUT, IN> OUT accept(VectorVisitor<OUT, IN> visitor, IN value) {
+ return visitor.visit(this, value);
+ }
+
+ @Override
+ public void copyFrom(int fromIndex, int thisIndex, ValueVector from) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public String getName() {
+ return this.getField().getName();
+ }
+
+ private class TransferImpl implements TransferPair {
+ NullVector to;
+
+ public TransferImpl(String ref) {
+ to = new NullVector(ref);
+ }
+
+ @Deprecated
+ public TransferImpl() {
+ to = new NullVector();
+ }
+
+ public TransferImpl(NullVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public NullVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ to.valueCount = valueCount;
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ to.valueCount = length;
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ if (toIndex > to.valueCount) {
+ to.valueCount = toIndex;
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/PeriodDuration.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/PeriodDuration.java
new file mode 100644
index 000000000..ee48fe797
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/PeriodDuration.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import java.time.Duration;
+import java.time.Period;
+
+import org.apache.arrow.util.Preconditions;
+
+/**
+ * Combination of Period and Duration for representing this interval type
+ * as a POJO.
+ */
+public class PeriodDuration {
+ private final Period period;
+ private final Duration duration;
+
+ public PeriodDuration(Period period, Duration duration) {
+ this.period = Preconditions.checkNotNull(period);
+ this.duration = Preconditions.checkNotNull(duration);
+ }
+
+ public Period getPeriod() {
+ return period;
+ }
+
+ public Duration getDuration() {
+ return duration;
+ }
+
+ @Override
+ public String toString() {
+ return period.toString() + " " + duration.toString();
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (!(o instanceof PeriodDuration)) {
+ return false;
+ }
+ PeriodDuration other = (PeriodDuration) o;
+ return this.period.equals(other.period) && this.duration.equals(other.duration);
+ }
+
+ @Override
+ public int hashCode() {
+ return this.period.hashCode() * 31 + this.duration.hashCode();
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/SchemaChangeCallBack.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/SchemaChangeCallBack.java
new file mode 100644
index 000000000..b61e4a160
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/SchemaChangeCallBack.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import org.apache.arrow.vector.util.CallBack;
+
+
+/**
+ * Callback for when the Schema for the Vector changes (generally happens when a vector is promoted to a union type
+ * from a single value type).
+ */
+public class SchemaChangeCallBack implements CallBack {
+ private boolean schemaChanged = false;
+
+ /**
+ * Constructs a schema-change callback with the schema-changed state set to
+ * {@code false}.
+ */
+ public SchemaChangeCallBack() {
+ }
+
+ /**
+ * Sets the schema-changed state to {@code true}.
+ */
+ @Override
+ public void doWork() {
+ schemaChanged = true;
+ }
+
+ /**
+ * Returns the value of schema-changed state, <strong>resetting</strong> the
+ * schema-changed state to {@code false}.
+ *
+ * @return the previous schema-changed state
+ */
+ public boolean getSchemaChangedAndReset() {
+ final boolean current = schemaChanged;
+ schemaChanged = false;
+ return current;
+ }
+}
+
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/SmallIntVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/SmallIntVector.java
new file mode 100644
index 000000000..1de6dea90
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/SmallIntVector.java
@@ -0,0 +1,389 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.SmallIntReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.NullableSmallIntHolder;
+import org.apache.arrow.vector.holders.SmallIntHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * SmallIntVector implements a fixed width (2 bytes) vector of
+ * short values which could be null. A validity buffer (bit vector) is
+ * maintained to track which elements in the vector are null.
+ */
+public final class SmallIntVector extends BaseFixedWidthVector implements BaseIntVector {
+ public static final byte TYPE_WIDTH = 2;
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a SmallIntVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public SmallIntVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.SMALLINT.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a SmallIntVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public SmallIntVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a SmallIntVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public SmallIntVector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ reader = new SmallIntReaderImpl(SmallIntVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.SMALLINT;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public short get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return valueBuffer.getShort((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableSmallIntHolder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getShort((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Short getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return valueBuffer.getShort((long) index * TYPE_WIDTH);
+ }
+ }
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ private void setValue(int index, int value) {
+ valueBuffer.setShort((long) index * TYPE_WIDTH, value);
+ }
+
+ private void setValue(int index, short value) {
+ valueBuffer.setShort((long) index * TYPE_WIDTH, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, int value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, short value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableSmallIntHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, SmallIntHolder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, int)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, int value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, short)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, short value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableSmallIntHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableSmallIntHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, SmallIntHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, SmallIntHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void set(int index, int isSet, short value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, short)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void setSafe(int index, int isSet, short value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+ /**
+ * Given a data buffer, get the value stored at a particular position
+ * in the vector.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return value stored at the index.
+ */
+ public static short get(final ArrowBuf buffer, final int index) {
+ return buffer.getShort((long) index * TYPE_WIDTH);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((SmallIntVector) to);
+ }
+
+ @Override
+ public void setWithPossibleTruncate(int index, long value) {
+ this.setSafe(index, (int) value);
+ }
+
+ @Override
+ public void setUnsafeWithPossibleTruncate(int index, long value) {
+ this.set(index, (int) value);
+ }
+
+ @Override
+ public long getValueAsLong(int index) {
+ return this.get(index);
+ }
+
+ private class TransferImpl implements TransferPair {
+ SmallIntVector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new SmallIntVector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(SmallIntVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public SmallIntVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, SmallIntVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeMicroVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeMicroVector.java
new file mode 100644
index 000000000..cf128859e
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeMicroVector.java
@@ -0,0 +1,347 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.TimeMicroReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.NullableTimeMicroHolder;
+import org.apache.arrow.vector.holders.TimeMicroHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * TimeMicroVector implements a fixed width vector (8 bytes) of
+ * time (microsecond resolution) values which could be null.
+ * A validity buffer (bit vector) is maintained to track which elements in the
+ * vector are null.
+ */
+public final class TimeMicroVector extends BaseFixedWidthVector {
+ public static final byte TYPE_WIDTH = 8;
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a TimeMicroVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeMicroVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.TIMEMICRO.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a TimeMicroVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeMicroVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a TimeMicroVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeMicroVector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ reader = new TimeMicroReaderImpl(TimeMicroVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.TIMEMICRO;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public long get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableTimeMicroHolder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Long getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+ }
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ private void setValue(int index, long value) {
+ valueBuffer.setLong((long) index * TYPE_WIDTH, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, long value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableTimeMicroHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, TimeMicroHolder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, long)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, long value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableTimeMicroHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableTimeMicroHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, TimeMicroHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, TimeMicroHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void set(int index, int isSet, long value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, long)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void setSafe(int index, int isSet, long value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+ /**
+ * Given a data buffer, get the value stored at a particular position
+ * in the vector.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return value stored at the index.
+ */
+ public static long get(final ArrowBuf buffer, int index) {
+ return buffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((TimeMicroVector) to);
+ }
+
+ private class TransferImpl implements TransferPair {
+ TimeMicroVector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new TimeMicroVector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(TimeMicroVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public TimeMicroVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, TimeMicroVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeMilliVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeMilliVector.java
new file mode 100644
index 000000000..b96990b10
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeMilliVector.java
@@ -0,0 +1,351 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import java.time.LocalDateTime;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.TimeMilliReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.NullableTimeMilliHolder;
+import org.apache.arrow.vector.holders.TimeMilliHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.DateUtility;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * TimeMilliVector implements a fixed width (4 bytes) vector of
+ * time (millisecond resolution) values which could be null. A validity buffer
+ * (bit vector) is maintained to track which elements in the vector are null.
+ */
+public final class TimeMilliVector extends BaseFixedWidthVector {
+ public static final byte TYPE_WIDTH = 4;
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a TimeMilliVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeMilliVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.TIMEMILLI.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a TimeMilliVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeMilliVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a TimeMilliVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeMilliVector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ reader = new TimeMilliReaderImpl(TimeMilliVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.TIMEMILLI;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public int get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return valueBuffer.getInt((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableTimeMilliHolder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getInt((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public LocalDateTime getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ }
+ final int millis = valueBuffer.getInt((long) index * TYPE_WIDTH);
+ // TODO: this doesn't seem right, time not from epoch
+ return DateUtility.getLocalDateTimeFromEpochMilli(millis);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ private void setValue(int index, int value) {
+ valueBuffer.setInt((long) index * TYPE_WIDTH, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, int value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableTimeMilliHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, TimeMilliHolder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, int)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, int value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableTimeMilliHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableTimeMilliHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, TimeMilliHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, TimeMilliHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void set(int index, int isSet, int value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, int)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void setSafe(int index, int isSet, int value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+
+ /**
+ * Given a data buffer, get the value stored at a particular position
+ * in the vector.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return value stored at the index.
+ */
+ public static int get(final ArrowBuf buffer, final int index) {
+ return buffer.getInt((long) index * TYPE_WIDTH);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((TimeMilliVector) to);
+ }
+
+ private class TransferImpl implements TransferPair {
+ TimeMilliVector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new TimeMilliVector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(TimeMilliVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public TimeMilliVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, TimeMilliVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeNanoVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeNanoVector.java
new file mode 100644
index 000000000..bc78a0264
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeNanoVector.java
@@ -0,0 +1,347 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.TimeNanoReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.NullableTimeNanoHolder;
+import org.apache.arrow.vector.holders.TimeNanoHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * TimeNanoVector implements a fixed width vector (8 bytes) of
+ * time (nanosecond resolution) values which could be null. A validity buffer
+ * (bit vector) is maintained to track which elements in the vector are null.
+ */
+public final class TimeNanoVector extends BaseFixedWidthVector {
+ public static final byte TYPE_WIDTH = 8;
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a TimeNanoVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeNanoVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.TIMENANO.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a TimeNanoVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeNanoVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a TimeNanoVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeNanoVector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ reader = new TimeNanoReaderImpl(TimeNanoVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.TIMENANO;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public long get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableTimeNanoHolder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Long getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ private void setValue(int index, long value) {
+ valueBuffer.setLong((long) index * TYPE_WIDTH, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, long value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableTimeNanoHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, TimeNanoHolder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, long)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, long value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableTimeNanoHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableTimeNanoHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, TimeNanoHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, TimeNanoHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void set(int index, int isSet, long value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, long)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void setSafe(int index, int isSet, long value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+ /**
+ * Given a data buffer, get the value stored at a particular position
+ * in the vector.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return value stored at the index.
+ */
+ public static long get(final ArrowBuf buffer, final int index) {
+ return buffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((TimeNanoVector) to);
+ }
+
+ private class TransferImpl implements TransferPair {
+ TimeNanoVector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new TimeNanoVector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(TimeNanoVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public TimeNanoVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, TimeNanoVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeSecVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeSecVector.java
new file mode 100644
index 000000000..29b7381be
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeSecVector.java
@@ -0,0 +1,348 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.TimeSecReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.NullableTimeSecHolder;
+import org.apache.arrow.vector.holders.TimeSecHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * TimeSecVector implements a fixed width (4 bytes) vector of
+ * time (seconds resolution) values which could be null. A validity buffer (bit vector) is
+ * maintained to track which elements in the vector are null.
+ */
+public final class TimeSecVector extends BaseFixedWidthVector {
+ public static final byte TYPE_WIDTH = 4;
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a TimeSecVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeSecVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.TIMESEC.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a TimeSecVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeSecVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a TimeSecVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeSecVector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ reader = new TimeSecReaderImpl(TimeSecVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.TIMESEC;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public int get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return valueBuffer.getInt((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableTimeSecHolder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getInt((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Integer getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return valueBuffer.getInt((long) index * TYPE_WIDTH);
+ }
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ private void setValue(int index, int value) {
+ valueBuffer.setInt((long) index * TYPE_WIDTH, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, int value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableTimeSecHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, TimeSecHolder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, int)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, int value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableTimeSecHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableTimeSecHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, TimeSecHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, TimeSecHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void set(int index, int isSet, int value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, int)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void setSafe(int index, int isSet, int value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+ /**
+ * Given a data buffer, get the value stored at a particular position
+ * in the vector.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return value stored at the index.
+ */
+ public static int get(final ArrowBuf buffer, final int index) {
+ return buffer.getInt((long) index * TYPE_WIDTH);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((TimeSecVector) to);
+ }
+
+ private class TransferImpl implements TransferPair {
+ TimeSecVector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new TimeSecVector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(TimeSecVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public TimeSecVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, TimeSecVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMicroTZVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMicroTZVector.java
new file mode 100644
index 000000000..17715780e
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMicroTZVector.java
@@ -0,0 +1,239 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.TimeStampMicroTZReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.NullableTimeStampMicroTZHolder;
+import org.apache.arrow.vector.holders.TimeStampMicroTZHolder;
+import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * TimeStampMicroTZVector implements a fixed width vector (8 bytes) of
+ * timestamp (microsecond resolution) values which could be null. A validity buffer
+ * (bit vector) is maintained to track which elements in the vector are null.
+ */
+public final class TimeStampMicroTZVector extends TimeStampVector {
+ private final FieldReader reader;
+ private final String timeZone;
+
+ /**
+ * Instantiate a TimeStampMicroTZVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampMicroTZVector(String name, BufferAllocator allocator, String timeZone) {
+ this(name, FieldType.nullable(new ArrowType.Timestamp(TimeUnit.MICROSECOND, timeZone)), allocator);
+ }
+
+ /**
+ * Instantiate a TimeStampMicroTZVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampMicroTZVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ super(name, fieldType, allocator);
+ ArrowType.Timestamp arrowType = (ArrowType.Timestamp) fieldType.getType();
+ timeZone = arrowType.getTimezone();
+ reader = new TimeStampMicroTZReaderImpl(TimeStampMicroTZVector.this);
+ }
+
+ /**
+ * Instantiate a TimeStampMicroTZVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampMicroTZVector(Field field, BufferAllocator allocator) {
+ super(field, allocator);
+ ArrowType.Timestamp arrowType = (ArrowType.Timestamp) field.getFieldType().getType();
+ timeZone = arrowType.getTimezone();
+ reader = new TimeStampMicroTZReaderImpl(TimeStampMicroTZVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.TIMESTAMPMICROTZ;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableTimeStampMicroTZHolder holder) {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Long getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableTimeStampMicroTZHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, TimeStampMicroTZHolder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableTimeStampMicroTZHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableTimeStampMicroTZHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, TimeStampMicroTZHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, TimeStampMicroTZHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ TimeStampMicroTZVector to = new TimeStampMicroTZVector(ref,
+ field.getFieldType(), allocator);
+ return new TransferImpl(to);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((TimeStampMicroTZVector) to);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMicroVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMicroVector.java
new file mode 100644
index 000000000..5cbef8962
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMicroVector.java
@@ -0,0 +1,236 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import java.time.LocalDateTime;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.TimeStampMicroReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.NullableTimeStampMicroHolder;
+import org.apache.arrow.vector.holders.TimeStampMicroHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.DateUtility;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * TimeStampMicroVector implements a fixed width vector (8 bytes) of
+ * timestamp (microsecond resolution) values which could be null. A validity buffer
+ * (bit vector) is maintained to track which elements in the vector are null.
+ */
+public final class TimeStampMicroVector extends TimeStampVector {
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a TimeStampMicroVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampMicroVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.TIMESTAMPMICRO.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a TimeStampMicroVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampMicroVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ super(name, fieldType, allocator);
+ reader = new TimeStampMicroReaderImpl(TimeStampMicroVector.this);
+ }
+
+ /**
+ * Instantiate a TimeStampMicroVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampMicroVector(Field field, BufferAllocator allocator) {
+ super(field, allocator);
+ reader = new TimeStampMicroReaderImpl(TimeStampMicroVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.TIMESTAMPMICRO;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableTimeStampMicroHolder holder) {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public LocalDateTime getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ final long micros = valueBuffer.getLong((long) index * TYPE_WIDTH);
+ return DateUtility.getLocalDateTimeFromEpochMicro(micros);
+ }
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableTimeStampMicroHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, TimeStampMicroHolder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableTimeStampMicroHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableTimeStampMicroHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, TimeStampMicroHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, TimeStampMicroHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ TimeStampMicroVector to = new TimeStampMicroVector(ref,
+ field.getFieldType(), allocator);
+ return new TransferImpl(to);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((TimeStampMicroVector) to);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMilliTZVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMilliTZVector.java
new file mode 100644
index 000000000..e66bbf450
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMilliTZVector.java
@@ -0,0 +1,238 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.TimeStampMilliTZReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.NullableTimeStampMilliTZHolder;
+import org.apache.arrow.vector.holders.TimeStampMilliTZHolder;
+import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * TimeStampMilliTZVector implements a fixed width vector (8 bytes) of
+ * timestamp (millisecond resolution) values which could be null. A validity buffer
+ * (bit vector) is maintained to track which elements in the vector are null.
+ */
+public final class TimeStampMilliTZVector extends TimeStampVector {
+ private final FieldReader reader;
+ private final String timeZone;
+
+ /**
+ * Instantiate a TimeStampMilliTZVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampMilliTZVector(String name, BufferAllocator allocator, String timeZone) {
+ this(name, FieldType.nullable(new ArrowType.Timestamp(TimeUnit.MILLISECOND, timeZone)), allocator);
+ }
+
+ /**
+ * Instantiate a TimeStampMilliTZVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampMilliTZVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ super(name, fieldType, allocator);
+ ArrowType.Timestamp arrowType = (ArrowType.Timestamp) fieldType.getType();
+ timeZone = arrowType.getTimezone();
+ reader = new TimeStampMilliTZReaderImpl(TimeStampMilliTZVector.this);
+ }
+
+ /**
+ * Instantiate a TimeStampMilliTZVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampMilliTZVector(Field field, BufferAllocator allocator) {
+ super(field, allocator);
+ ArrowType.Timestamp arrowType = (ArrowType.Timestamp) field.getFieldType().getType();
+ timeZone = arrowType.getTimezone();
+ reader = new TimeStampMilliTZReaderImpl(TimeStampMilliTZVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.TIMESTAMPMILLITZ;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableTimeStampMilliTZHolder holder) {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Long getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableTimeStampMilliTZHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, TimeStampMilliTZHolder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableTimeStampMilliTZHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableTimeStampMilliTZHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, TimeStampMilliTZHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, TimeStampMilliTZHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ TimeStampMilliTZVector to = new TimeStampMilliTZVector(ref,
+ field.getFieldType(), allocator);
+ return new TransferImpl(to);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((TimeStampMilliTZVector) to);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMilliVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMilliVector.java
new file mode 100644
index 000000000..8f46f5606
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMilliVector.java
@@ -0,0 +1,236 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import java.time.LocalDateTime;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.TimeStampMilliReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.NullableTimeStampMilliHolder;
+import org.apache.arrow.vector.holders.TimeStampMilliHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.DateUtility;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * TimeStampMilliVector implements a fixed width vector (8 bytes) of
+ * timestamp (millisecond resolution) values which could be null. A validity buffer
+ * (bit vector) is maintained to track which elements in the vector are null.
+ */
+public final class TimeStampMilliVector extends TimeStampVector {
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a TimeStampMilliVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampMilliVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.TIMESTAMPMILLI.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a TimeStampMilliVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampMilliVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ super(name, fieldType, allocator);
+ reader = new TimeStampMilliReaderImpl(TimeStampMilliVector.this);
+ }
+
+ /**
+ * Instantiate a TimeStampMilliVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampMilliVector(Field field, BufferAllocator allocator) {
+ super(field, allocator);
+ reader = new TimeStampMilliReaderImpl(TimeStampMilliVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.TIMESTAMPMILLI;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableTimeStampMilliHolder holder) {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public LocalDateTime getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ final long millis = valueBuffer.getLong((long) index * TYPE_WIDTH);
+ return DateUtility.getLocalDateTimeFromEpochMilli(millis);
+ }
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableTimeStampMilliHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, TimeStampMilliHolder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableTimeStampMilliHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableTimeStampMilliHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, TimeStampMilliHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, TimeStampMilliHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ TimeStampMilliVector to = new TimeStampMilliVector(ref,
+ field.getFieldType(), allocator);
+ return new TransferImpl(to);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((TimeStampMilliVector) to);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampNanoTZVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampNanoTZVector.java
new file mode 100644
index 000000000..a3e582a7c
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampNanoTZVector.java
@@ -0,0 +1,241 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.TimeStampNanoTZReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.NullableTimeStampNanoTZHolder;
+import org.apache.arrow.vector.holders.TimeStampNanoTZHolder;
+import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * TimeStampNanoTZVector implements a fixed width vector (8 bytes) of
+ * timestamp (nanosecond resolution) values which could be null. A validity buffer
+ * (bit vector) is maintained to track which elements in the vector are null.
+ */
+public final class TimeStampNanoTZVector extends TimeStampVector {
+ private final FieldReader reader;
+ private final String timeZone;
+
+ /**
+ * Instantiate a TimeStampNanoTZVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampNanoTZVector(String name, BufferAllocator allocator, String timeZone) {
+ this(name, FieldType.nullable(new ArrowType.Timestamp(TimeUnit.NANOSECOND, timeZone)), allocator);
+ }
+
+ /**
+ * Instantiate a TimeStampNanoTZVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampNanoTZVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ super(name, fieldType, allocator);
+ ArrowType.Timestamp arrowType = (ArrowType.Timestamp) fieldType.getType();
+ timeZone = arrowType.getTimezone();
+ reader = new TimeStampNanoTZReaderImpl(TimeStampNanoTZVector.this);
+ }
+
+ /**
+ * Instantiate a TimeStampNanoTZVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampNanoTZVector(Field field, BufferAllocator allocator) {
+ super(field, allocator);
+ ArrowType.Timestamp arrowType = (ArrowType.Timestamp) field.getFieldType().getType();
+ timeZone = arrowType.getTimezone();
+ reader = new TimeStampNanoTZReaderImpl(TimeStampNanoTZVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.TIMESTAMPNANOTZ;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableTimeStampNanoTZHolder holder) {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Long getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableTimeStampNanoTZHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, TimeStampNanoTZHolder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableTimeStampNanoTZHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(
+ int index,
+ NullableTimeStampNanoTZHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, TimeStampNanoTZHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, TimeStampNanoTZHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ TimeStampNanoTZVector to = new TimeStampNanoTZVector(ref,
+ field.getFieldType(), allocator);
+ return new TransferImpl(to);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((TimeStampNanoTZVector) to);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampNanoVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampNanoVector.java
new file mode 100644
index 000000000..7b87dac43
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampNanoVector.java
@@ -0,0 +1,236 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import java.time.LocalDateTime;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.TimeStampNanoReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.NullableTimeStampNanoHolder;
+import org.apache.arrow.vector.holders.TimeStampNanoHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.DateUtility;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * TimeStampNanoVector implements a fixed width vector (8 bytes) of
+ * timestamp (nanosecond resolution) values which could be null. A validity buffer
+ * (bit vector) is maintained to track which elements in the vector are null.
+ */
+public final class TimeStampNanoVector extends TimeStampVector {
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a TimeStampNanoVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampNanoVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.TIMESTAMPNANO.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a TimeStampNanoVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampNanoVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ super(name, fieldType, allocator);
+ reader = new TimeStampNanoReaderImpl(TimeStampNanoVector.this);
+ }
+
+ /**
+ * Instantiate a TimeStampNanoVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampNanoVector(Field field, BufferAllocator allocator) {
+ super(field, allocator);
+ reader = new TimeStampNanoReaderImpl(TimeStampNanoVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.TIMESTAMPNANO;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableTimeStampNanoHolder holder) {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public LocalDateTime getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ final long nanos = valueBuffer.getLong((long) index * TYPE_WIDTH);
+ return DateUtility.getLocalDateTimeFromEpochNano(nanos);
+ }
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableTimeStampNanoHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, TimeStampNanoHolder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableTimeStampNanoHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableTimeStampNanoHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, TimeStampNanoHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, TimeStampNanoHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ TimeStampNanoVector to = new TimeStampNanoVector(ref,
+ field.getFieldType(), allocator);
+ return new TransferImpl(to);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((TimeStampNanoVector) to);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampSecTZVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampSecTZVector.java
new file mode 100644
index 000000000..f5a0498fe
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampSecTZVector.java
@@ -0,0 +1,238 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.TimeStampSecTZReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.NullableTimeStampSecTZHolder;
+import org.apache.arrow.vector.holders.TimeStampSecTZHolder;
+import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * TimeStampSecTZVector implements a fixed width vector (8 bytes) of
+ * timestamp (seconds resolution) values which could be null. A validity buffer
+ * (bit vector) is maintained to track which elements in the vector are null.
+ */
+public final class TimeStampSecTZVector extends TimeStampVector {
+ private final FieldReader reader;
+ private final String timeZone;
+
+ /**
+ * Instantiate a TimeStampSecTZVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampSecTZVector(String name, BufferAllocator allocator, String timeZone) {
+ this(name, FieldType.nullable(new ArrowType.Timestamp(TimeUnit.SECOND, timeZone)), allocator);
+ }
+
+ /**
+ * Instantiate a TimeStampSecTZVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampSecTZVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ super(name, fieldType, allocator);
+ ArrowType.Timestamp arrowType = (ArrowType.Timestamp) fieldType.getType();
+ timeZone = arrowType.getTimezone();
+ reader = new TimeStampSecTZReaderImpl(TimeStampSecTZVector.this);
+ }
+
+ /**
+ * Instantiate a TimeStampSecTZVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampSecTZVector(Field field, BufferAllocator allocator) {
+ super(field, allocator);
+ ArrowType.Timestamp arrowType = (ArrowType.Timestamp) field.getFieldType().getType();
+ timeZone = arrowType.getTimezone();
+ reader = new TimeStampSecTZReaderImpl(TimeStampSecTZVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.TIMESTAMPSECTZ;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableTimeStampSecTZHolder holder) {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Long getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableTimeStampSecTZHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, TimeStampSecTZHolder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableTimeStampSecTZHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableTimeStampSecTZHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, TimeStampSecTZHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, TimeStampSecTZHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ TimeStampSecTZVector to = new TimeStampSecTZVector(ref,
+ field.getFieldType(), allocator);
+ return new TransferImpl(to);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((TimeStampSecTZVector) to);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampSecVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampSecVector.java
new file mode 100644
index 000000000..f12e19684
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampSecVector.java
@@ -0,0 +1,237 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import java.time.LocalDateTime;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.TimeStampSecReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.NullableTimeStampSecHolder;
+import org.apache.arrow.vector.holders.TimeStampSecHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.DateUtility;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * TimeStampSecVector implements a fixed width vector (8 bytes) of
+ * timestamp (seconds resolution) values which could be null. A validity buffer (bit vector) is
+ * maintained to track which elements in the vector are null.
+ */
+public final class TimeStampSecVector extends TimeStampVector {
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a TimeStampSecVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampSecVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.TIMESTAMPSEC.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a TimeStampSecVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampSecVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ super(name, fieldType, allocator);
+ reader = new TimeStampSecReaderImpl(TimeStampSecVector.this);
+ }
+
+ /**
+ * Instantiate a TimeStampSecVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampSecVector(Field field, BufferAllocator allocator) {
+ super(field, allocator);
+ reader = new TimeStampSecReaderImpl(TimeStampSecVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.TIMESTAMPSEC;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableTimeStampSecHolder holder) {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public LocalDateTime getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ final long secs = valueBuffer.getLong((long) index * TYPE_WIDTH);
+ final long millis = java.util.concurrent.TimeUnit.SECONDS.toMillis(secs);
+ return DateUtility.getLocalDateTimeFromEpochMilli(millis);
+ }
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableTimeStampSecHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, TimeStampSecHolder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableTimeStampSecHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableTimeStampSecHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, TimeStampSecHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, TimeStampSecHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ TimeStampSecVector to = new TimeStampSecVector(ref,
+ field.getFieldType(), allocator);
+ return new TransferImpl(to);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((TimeStampSecVector) to);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampVector.java
new file mode 100644
index 000000000..d85a793fb
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampVector.java
@@ -0,0 +1,197 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * TimeStampVector is an abstract interface for fixed width vector (8 bytes)
+ * of timestamp values which could be null. A validity buffer (bit vector) is
+ * maintained to track which elements in the vector are null.
+ */
+public abstract class TimeStampVector extends BaseFixedWidthVector {
+ public static final byte TYPE_WIDTH = 8;
+
+ /**
+ * Instantiate a TimeStampVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a TimeStampVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampVector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public long get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ protected void setValue(int index, long value) {
+ valueBuffer.setLong((long) index * TYPE_WIDTH, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, long value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, long)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, long value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void set(int index, int isSet, long value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, long)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void setSafe(int index, int isSet, long value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+ /**
+ * Given a data buffer, get the value stored at a particular position
+ * in the vector.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return value stored at the index.
+ */
+ public static long get(final ArrowBuf buffer, final int index) {
+ return buffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * {@link TransferPair} for {@link TimeStampVector}.
+ */
+ public class TransferImpl implements TransferPair {
+ TimeStampVector to;
+
+ public TransferImpl(TimeStampVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public TimeStampVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, TimeStampVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TinyIntVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TinyIntVector.java
new file mode 100644
index 000000000..f08b0e02f
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TinyIntVector.java
@@ -0,0 +1,390 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.TinyIntReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.NullableTinyIntHolder;
+import org.apache.arrow.vector.holders.TinyIntHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * TinyIntVector implements a fixed width (1 bytes) vector of
+ * byte values which could be null. A validity buffer (bit vector) is
+ * maintained to track which elements in the vector are null.
+ */
+public final class TinyIntVector extends BaseFixedWidthVector implements BaseIntVector {
+ public static final byte TYPE_WIDTH = 1;
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a TinyIntVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public TinyIntVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.TINYINT.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a TinyIntVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TinyIntVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a TinyIntVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TinyIntVector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ reader = new TinyIntReaderImpl(TinyIntVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.TINYINT;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public byte get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return valueBuffer.getByte(index * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableTinyIntHolder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getByte(index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Byte getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return valueBuffer.getByte(index * TYPE_WIDTH);
+ }
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+ private void setValue(int index, int value) {
+ valueBuffer.setByte(index * TYPE_WIDTH, value);
+ }
+
+ private void setValue(int index, byte value) {
+ valueBuffer.setByte(index * TYPE_WIDTH, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, int value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, byte value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableTinyIntHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, TinyIntHolder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, int)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, int value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, byte)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, byte value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableTinyIntHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableTinyIntHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, TinyIntHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, TinyIntHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void set(int index, int isSet, byte value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, byte)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void setSafe(int index, int isSet, byte value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+ /**
+ * Given a data buffer, get the value stored at a particular position
+ * in the vector.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return value stored at the index.
+ */
+ public static byte get(final ArrowBuf buffer, final int index) {
+ return buffer.getByte(index * TYPE_WIDTH);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((TinyIntVector) to);
+ }
+
+ @Override
+ public void setWithPossibleTruncate(int index, long value) {
+ this.setSafe(index, (int) value);
+ }
+
+ @Override
+ public void setUnsafeWithPossibleTruncate(int index, long value) {
+ this.set(index, (int) value);
+ }
+
+ @Override
+ public long getValueAsLong(int index) {
+ return this.get(index);
+ }
+
+ private class TransferImpl implements TransferPair {
+ TinyIntVector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new TinyIntVector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(TinyIntVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public TinyIntVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, TinyIntVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java
new file mode 100644
index 000000000..60fe2a6a6
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java
@@ -0,0 +1,448 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static java.util.Arrays.asList;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.BufferLayout.BufferType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeVisitor;
+import org.apache.arrow.vector.types.pojo.ArrowType.Binary;
+import org.apache.arrow.vector.types.pojo.ArrowType.Bool;
+import org.apache.arrow.vector.types.pojo.ArrowType.Date;
+import org.apache.arrow.vector.types.pojo.ArrowType.Decimal;
+import org.apache.arrow.vector.types.pojo.ArrowType.Duration;
+import org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeBinary;
+import org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeList;
+import org.apache.arrow.vector.types.pojo.ArrowType.FloatingPoint;
+import org.apache.arrow.vector.types.pojo.ArrowType.Int;
+import org.apache.arrow.vector.types.pojo.ArrowType.Interval;
+import org.apache.arrow.vector.types.pojo.ArrowType.LargeBinary;
+import org.apache.arrow.vector.types.pojo.ArrowType.LargeUtf8;
+import org.apache.arrow.vector.types.pojo.ArrowType.Map;
+import org.apache.arrow.vector.types.pojo.ArrowType.Null;
+import org.apache.arrow.vector.types.pojo.ArrowType.Struct;
+import org.apache.arrow.vector.types.pojo.ArrowType.Time;
+import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp;
+import org.apache.arrow.vector.types.pojo.ArrowType.Union;
+import org.apache.arrow.vector.types.pojo.ArrowType.Utf8;
+
+/**
+ * The buffer layout of vectors for a given type.
+ * It defines its own buffers followed by the buffers for the children
+ * if it is a nested type (Struct_, List, Union)
+ */
+public class TypeLayout {
+
+ /**
+ * Constructs a new {@TypeLayout} for the given <code>arrowType</code>.
+ */
+ public static TypeLayout getTypeLayout(final ArrowType arrowType) {
+ TypeLayout layout = arrowType.accept(new ArrowTypeVisitor<TypeLayout>() {
+
+ @Override
+ public TypeLayout visit(Int type) {
+ return newFixedWidthTypeLayout(BufferLayout.dataBuffer(type.getBitWidth()));
+ }
+
+ @Override
+ public TypeLayout visit(Union type) {
+ List<BufferLayout> vectors;
+ switch (type.getMode()) {
+ case Dense:
+ vectors = asList(
+ BufferLayout.typeBuffer(),
+ BufferLayout.offsetBuffer() // offset to find the vector
+ );
+ break;
+ case Sparse:
+ vectors = asList(
+ BufferLayout.typeBuffer() // type of the value at the index or 0 if null
+ );
+ break;
+ default:
+ throw new UnsupportedOperationException("Unsupported Union Mode: " + type.getMode());
+ }
+ return new TypeLayout(vectors);
+ }
+
+ @Override
+ public TypeLayout visit(Struct type) {
+ List<BufferLayout> vectors = asList(
+ BufferLayout.validityVector()
+ );
+ return new TypeLayout(vectors);
+ }
+
+ @Override
+ public TypeLayout visit(Timestamp type) {
+ return newFixedWidthTypeLayout(BufferLayout.dataBuffer(64));
+ }
+
+ @Override
+ public TypeLayout visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) {
+ List<BufferLayout> vectors = asList(
+ BufferLayout.validityVector(),
+ BufferLayout.offsetBuffer()
+ );
+ return new TypeLayout(vectors);
+ }
+
+ @Override
+ public TypeLayout visit(ArrowType.LargeList type) {
+ List<BufferLayout> vectors = asList(
+ BufferLayout.validityVector(),
+ BufferLayout.largeOffsetBuffer()
+ );
+ return new TypeLayout(vectors);
+ }
+
+ @Override
+ public TypeLayout visit(FixedSizeList type) {
+ List<BufferLayout> vectors = asList(
+ BufferLayout.validityVector()
+ );
+ return new TypeLayout(vectors);
+ }
+
+ @Override
+ public TypeLayout visit(Map type) {
+ List<BufferLayout> vectors = asList(
+ BufferLayout.validityVector(),
+ BufferLayout.offsetBuffer()
+ );
+ return new TypeLayout(vectors);
+ }
+
+ @Override
+ public TypeLayout visit(FloatingPoint type) {
+ int bitWidth;
+ switch (type.getPrecision()) {
+ case HALF:
+ bitWidth = 16;
+ break;
+ case SINGLE:
+ bitWidth = 32;
+ break;
+ case DOUBLE:
+ bitWidth = 64;
+ break;
+ default:
+ throw new UnsupportedOperationException("Unsupported Precision: " + type.getPrecision());
+ }
+ return newFixedWidthTypeLayout(BufferLayout.dataBuffer(bitWidth));
+ }
+
+ @Override
+ public TypeLayout visit(Decimal type) {
+ return newFixedWidthTypeLayout(BufferLayout.dataBuffer(type.getBitWidth()));
+ }
+
+ @Override
+ public TypeLayout visit(FixedSizeBinary type) {
+ return newFixedWidthTypeLayout(new BufferLayout(BufferType.DATA, type.getByteWidth() * 8));
+ }
+
+ @Override
+ public TypeLayout visit(Bool type) {
+ return newFixedWidthTypeLayout(BufferLayout.booleanVector());
+ }
+
+ @Override
+ public TypeLayout visit(Binary type) {
+ return newVariableWidthTypeLayout();
+ }
+
+ @Override
+ public TypeLayout visit(Utf8 type) {
+ return newVariableWidthTypeLayout();
+ }
+
+ @Override
+ public TypeLayout visit(LargeUtf8 type) {
+ return newLargeVariableWidthTypeLayout();
+ }
+
+ @Override
+ public TypeLayout visit(LargeBinary type) {
+ return newLargeVariableWidthTypeLayout();
+ }
+
+ private TypeLayout newVariableWidthTypeLayout() {
+ return newPrimitiveTypeLayout(BufferLayout.validityVector(), BufferLayout.offsetBuffer(),
+ BufferLayout.byteVector());
+ }
+
+ private TypeLayout newLargeVariableWidthTypeLayout() {
+ return newPrimitiveTypeLayout(BufferLayout.validityVector(), BufferLayout.largeOffsetBuffer(),
+ BufferLayout.byteVector());
+ }
+
+ private TypeLayout newPrimitiveTypeLayout(BufferLayout... vectors) {
+ return new TypeLayout(asList(vectors));
+ }
+
+ public TypeLayout newFixedWidthTypeLayout(BufferLayout dataVector) {
+ return newPrimitiveTypeLayout(BufferLayout.validityVector(), dataVector);
+ }
+
+ @Override
+ public TypeLayout visit(Null type) {
+ return new TypeLayout(Collections.<BufferLayout>emptyList());
+ }
+
+ @Override
+ public TypeLayout visit(Date type) {
+ switch (type.getUnit()) {
+ case DAY:
+ return newFixedWidthTypeLayout(BufferLayout.dataBuffer(32));
+ case MILLISECOND:
+ return newFixedWidthTypeLayout(BufferLayout.dataBuffer(64));
+ default:
+ throw new UnsupportedOperationException("Unknown unit " + type.getUnit());
+ }
+ }
+
+ @Override
+ public TypeLayout visit(Time type) {
+ return newFixedWidthTypeLayout(BufferLayout.dataBuffer(type.getBitWidth()));
+ }
+
+ @Override
+ public TypeLayout visit(Interval type) {
+ switch (type.getUnit()) {
+ case DAY_TIME:
+ return newFixedWidthTypeLayout(BufferLayout.dataBuffer(64));
+ case YEAR_MONTH:
+ return newFixedWidthTypeLayout(BufferLayout.dataBuffer(32));
+ case MONTH_DAY_NANO:
+ return newFixedWidthTypeLayout(BufferLayout.dataBuffer(128));
+ default:
+ throw new UnsupportedOperationException("Unknown unit " + type.getUnit());
+ }
+ }
+
+ @Override
+ public TypeLayout visit(Duration type) {
+ return newFixedWidthTypeLayout(BufferLayout.dataBuffer(64));
+ }
+
+ });
+ return layout;
+ }
+
+ /**
+ * Gets the number of {@link BufferLayout}s for the given <code>arrowType</code>.
+ */
+ public static int getTypeBufferCount(final ArrowType arrowType) {
+ return arrowType.accept(new ArrowTypeVisitor<Integer>() {
+
+ /**
+ * All fixed width vectors have a common number of buffers 2: one validity buffer, plus a data buffer.
+ */
+ static final int FIXED_WIDTH_BUFFER_COUNT = 2;
+
+ /**
+ * All variable width vectors have a common number of buffers 3: a validity buffer,
+ * an offset buffer, and a data buffer.
+ */
+ static final int VARIABLE_WIDTH_BUFFER_COUNT = 3;
+
+ @Override
+ public Integer visit(Int type) {
+ return FIXED_WIDTH_BUFFER_COUNT;
+ }
+
+ @Override
+ public Integer visit(Union type) {
+ switch (type.getMode()) {
+ case Dense:
+ // TODO: validate this
+ return 2;
+ case Sparse:
+ // type buffer
+ return 1;
+ default:
+ throw new UnsupportedOperationException("Unsupported Union Mode: " + type.getMode());
+ }
+ }
+
+ @Override
+ public Integer visit(Struct type) {
+ // validity buffer
+ return 1;
+ }
+
+ @Override
+ public Integer visit(Timestamp type) {
+ return FIXED_WIDTH_BUFFER_COUNT;
+ }
+
+ @Override
+ public Integer visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) {
+ // validity buffer + offset buffer
+ return 2;
+ }
+
+ @Override
+ public Integer visit(ArrowType.LargeList type) {
+ // validity buffer + offset buffer
+ return 2;
+ }
+
+ @Override
+ public Integer visit(FixedSizeList type) {
+ // validity buffer
+ return 1;
+ }
+
+ @Override
+ public Integer visit(Map type) {
+ // validity buffer + offset buffer
+ return 2;
+ }
+
+ @Override
+ public Integer visit(FloatingPoint type) {
+ return FIXED_WIDTH_BUFFER_COUNT;
+ }
+
+ @Override
+ public Integer visit(Decimal type) {
+ return FIXED_WIDTH_BUFFER_COUNT;
+ }
+
+ @Override
+ public Integer visit(FixedSizeBinary type) {
+ return FIXED_WIDTH_BUFFER_COUNT;
+ }
+
+ @Override
+ public Integer visit(Bool type) {
+ return FIXED_WIDTH_BUFFER_COUNT;
+ }
+
+ @Override
+ public Integer visit(Binary type) {
+ return VARIABLE_WIDTH_BUFFER_COUNT;
+ }
+
+ @Override
+ public Integer visit(Utf8 type) {
+ return VARIABLE_WIDTH_BUFFER_COUNT;
+ }
+
+ @Override
+ public Integer visit(LargeUtf8 type) {
+ return VARIABLE_WIDTH_BUFFER_COUNT;
+ }
+
+ @Override
+ public Integer visit(LargeBinary type) {
+ return VARIABLE_WIDTH_BUFFER_COUNT;
+ }
+
+ @Override
+ public Integer visit(Null type) {
+ return 0;
+ }
+
+ @Override
+ public Integer visit(Date type) {
+ return FIXED_WIDTH_BUFFER_COUNT;
+ }
+
+ @Override
+ public Integer visit(Time type) {
+ return FIXED_WIDTH_BUFFER_COUNT;
+ }
+
+ @Override
+ public Integer visit(Interval type) {
+ return FIXED_WIDTH_BUFFER_COUNT;
+ }
+
+ @Override
+ public Integer visit(Duration type) {
+ return FIXED_WIDTH_BUFFER_COUNT;
+ }
+
+ });
+ }
+
+ private final List<BufferLayout> bufferLayouts;
+
+ public TypeLayout(List<BufferLayout> bufferLayouts) {
+ super();
+ this.bufferLayouts = Preconditions.checkNotNull(bufferLayouts);
+ }
+
+ public TypeLayout(BufferLayout... bufferLayouts) {
+ this(asList(bufferLayouts));
+ }
+
+ /**
+ * Returns the individual {@linkplain BufferLayout}s for the given type.
+ */
+ public List<BufferLayout> getBufferLayouts() {
+ return bufferLayouts;
+ }
+
+ /**
+ * Returns the types of each buffer for this layout. A layout can consist
+ * of multiple buffers for example a validity bitmap buffer, a value buffer or
+ * an offset buffer.
+ */
+ public List<BufferType> getBufferTypes() {
+ List<BufferType> types = new ArrayList<>(bufferLayouts.size());
+ for (BufferLayout vector : bufferLayouts) {
+ types.add(vector.getType());
+ }
+ return types;
+ }
+
+ public String toString() {
+ return bufferLayouts.toString();
+ }
+
+ @Override
+ public int hashCode() {
+ return bufferLayouts.hashCode();
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj) {
+ return true;
+ }
+ if (obj == null) {
+ return false;
+ }
+ if (getClass() != obj.getClass()) {
+ return false;
+ }
+ TypeLayout other = (TypeLayout) obj;
+ return bufferLayouts.equals(other.bufferLayouts);
+ }
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/UInt1Vector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/UInt1Vector.java
new file mode 100644
index 000000000..bd9a732c1
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/UInt1Vector.java
@@ -0,0 +1,368 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.UInt1ReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.NullableUInt1Holder;
+import org.apache.arrow.vector.holders.UInt1Holder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+import org.apache.arrow.vector.util.ValueVectorUtility;
+
+/**
+ * UInt1Vector implements a fixed width (1 bytes) vector of
+ * integer values which could be null. A validity buffer (bit vector) is
+ * maintained to track which elements in the vector are null.
+ */
+public final class UInt1Vector extends BaseFixedWidthVector implements BaseIntVector {
+ /**
+ * The mask to use when promoting the unsigned byte value to an integer.
+ */
+ public static final int PROMOTION_MASK = 0xFF;
+
+ /**
+ * The maximum 8-bit unsigned integer.
+ */
+ public static final byte MAX_UINT1 = (byte) 0XFF;
+
+ public static final byte TYPE_WIDTH = 1;
+ private final FieldReader reader;
+
+ public UInt1Vector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.UINT1.getType()), allocator);
+ }
+
+ public UInt1Vector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ public UInt1Vector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ reader = new UInt1ReaderImpl(UInt1Vector.this);
+ }
+
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.UINT1;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+ /**
+ * Given a data buffer, get the value stored at a particular position
+ * in the vector.
+ *
+ * <p>To avoid overflow, the returned type is one step up from the signed
+ * type.
+ *
+ * <p>This method is mainly meant for integration tests.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return value stored at the index.
+ */
+ public static short getNoOverflow(final ArrowBuf buffer, final int index) {
+ byte b = buffer.getByte(index * TYPE_WIDTH);
+ return (short) (PROMOTION_MASK & b);
+ }
+
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public byte get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return valueBuffer.getByte(index * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableUInt1Holder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getByte(index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Byte getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return valueBuffer.getByte(index * TYPE_WIDTH);
+ }
+ }
+
+ /**
+ * Returns the value stored at index without the potential for overflow.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Short getObjectNoOverflow(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return getNoOverflow(valueBuffer, index);
+ }
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ private void setValue(int index, int value) {
+ valueBuffer.setByte(index * TYPE_WIDTH, value);
+ }
+
+ private void setValue(int index, byte value) {
+ valueBuffer.setByte(index * TYPE_WIDTH, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, int value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, byte value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableUInt1Holder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, UInt1Holder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, int)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, int value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, byte)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, byte value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableUInt1Holder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableUInt1Holder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, UInt1Holder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, UInt1Holder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Sets the value at index to value isSet > 0, otherwise sets the index position
+ * to invalid/null.
+ */
+ public void set(int index, int isSet, byte value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, byte)} but will reallocate the buffer if index
+ * is larger than current capacity.
+ */
+ public void setSafe(int index, int isSet, byte value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((UInt1Vector) to);
+ }
+
+ @Override
+ public void setWithPossibleTruncate(int index, long value) {
+ this.setSafe(index, (int) value);
+ }
+
+ @Override
+ public void setUnsafeWithPossibleTruncate(int index, long value) {
+ this.set(index, (int) value);
+ }
+
+ @Override
+ public long getValueAsLong(int index) {
+ return this.get(index) & PROMOTION_MASK;
+ }
+
+ @Override
+ public String toString() {
+ return ValueVectorUtility.getToString(this, 0, getValueCount(), (v, i) -> v.getObjectNoOverflow(i));
+ }
+
+ private class TransferImpl implements TransferPair {
+ UInt1Vector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new UInt1Vector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(UInt1Vector to) {
+ this.to = to;
+ }
+
+ @Override
+ public UInt1Vector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, UInt1Vector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/UInt2Vector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/UInt2Vector.java
new file mode 100644
index 000000000..5c29ab6b3
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/UInt2Vector.java
@@ -0,0 +1,346 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.UInt2ReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.NullableUInt2Holder;
+import org.apache.arrow.vector.holders.UInt2Holder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+import org.apache.arrow.vector.util.ValueVectorUtility;
+
+/**
+ * UInt2Vector implements a fixed width (2 bytes) vector of
+ * integer values which could be null. A validity buffer (bit vector) is
+ * maintained to track which elements in the vector are null.
+ */
+public final class UInt2Vector extends BaseFixedWidthVector implements BaseIntVector {
+
+ /**
+ * The maximum 16-bit unsigned integer.
+ */
+ public static final char MAX_UINT2 = (char) 0XFFFF;
+
+ public static final byte TYPE_WIDTH = 2;
+ private final FieldReader reader;
+
+ public UInt2Vector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.UINT2.getType()), allocator);
+ }
+
+ public UInt2Vector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ public UInt2Vector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ reader = new UInt2ReaderImpl(UInt2Vector.this);
+ }
+
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.UINT2;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+ /**
+ * Given a data buffer, get the value stored at a particular position
+ * in the vector.
+ *
+ * <p>This method is mainly meant for integration tests.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return value stored at the index.
+ */
+ public static char get(final ArrowBuf buffer, final int index) {
+ return buffer.getChar((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public char get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return valueBuffer.getChar((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableUInt2Holder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getChar((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Character getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return valueBuffer.getChar((long) index * TYPE_WIDTH);
+ }
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ private void setValue(int index, int value) {
+ valueBuffer.setChar((long) index * TYPE_WIDTH, value);
+ }
+
+ private void setValue(int index, char value) {
+ valueBuffer.setChar((long) index * TYPE_WIDTH, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, int value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, char value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableUInt2Holder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, UInt2Holder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, int)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, int value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, char)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, char value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableUInt2Holder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableUInt2Holder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, UInt2Holder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, UInt2Holder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Sets the given index to value is isSet is positive, otherwise sets
+ * the position as invalid/null.
+ */
+ public void set(int index, int isSet, char value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, char)} but will reallocate the buffer if index
+ * is larger than current capacity.
+ */
+ public void setSafe(int index, int isSet, char value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((UInt2Vector) to);
+ }
+
+ @Override
+ public void setWithPossibleTruncate(int index, long value) {
+ this.setSafe(index, (int) value);
+ }
+
+ @Override
+ public void setUnsafeWithPossibleTruncate(int index, long value) {
+ this.set(index, (int) value);
+ }
+
+ @Override
+ public long getValueAsLong(int index) {
+ return this.get(index);
+ }
+
+ @Override
+ public String toString() {
+ return ValueVectorUtility.getToString(this, 0, getValueCount(), (v, i) ->
+ v.isNull(i) ? "null" : Integer.toString(v.get(i) & 0x0000ffff));
+ }
+
+ private class TransferImpl implements TransferPair {
+ UInt2Vector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new UInt2Vector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(UInt2Vector to) {
+ this.to = to;
+ }
+
+ @Override
+ public UInt2Vector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, UInt2Vector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/UInt4Vector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/UInt4Vector.java
new file mode 100644
index 000000000..cc954d67d
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/UInt4Vector.java
@@ -0,0 +1,340 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.UInt4ReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.NullableUInt4Holder;
+import org.apache.arrow.vector.holders.UInt4Holder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+import org.apache.arrow.vector.util.ValueVectorUtility;
+
+/**
+ * UInt4Vector implements a fixed width (4 bytes) vector of
+ * integer values which could be null. A validity buffer (bit vector) is
+ * maintained to track which elements in the vector are null.
+ */
+public final class UInt4Vector extends BaseFixedWidthVector implements BaseIntVector {
+
+ /**
+ * The mask to use when promoting the unsigned int value to a long int.
+ */
+ public static final long PROMOTION_MASK = 0x00000000FFFFFFFFL;
+
+ /**
+ * The maximum 32-bit unsigned integer.
+ */
+ public static final int MAX_UINT4 = 0XFFFFFFFF;
+
+ public static final byte TYPE_WIDTH = 4;
+ private final FieldReader reader;
+
+ public UInt4Vector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.UINT4.getType()), allocator);
+ }
+
+ public UInt4Vector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ public UInt4Vector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ reader = new UInt4ReaderImpl(UInt4Vector.this);
+ }
+
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.UINT4;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+ /**
+ * Given a data buffer, get the value stored at a particular position
+ * in the vector.
+ *
+ * <p>To avoid overflow, the returned type is one step up from the signed
+ * type.
+ *
+ * <p>This method is mainly meant for integration tests.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return value stored at the index.
+ */
+ public static long getNoOverflow(final ArrowBuf buffer, final int index) {
+ long l = buffer.getInt((long) index * TYPE_WIDTH);
+ return PROMOTION_MASK & l;
+ }
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public int get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return valueBuffer.getInt((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableUInt4Holder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getInt((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Integer getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return valueBuffer.getInt((long) index * TYPE_WIDTH);
+ }
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Long getObjectNoOverflow(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return getNoOverflow(valueBuffer, index);
+ }
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ private void setValue(int index, int value) {
+ valueBuffer.setInt((long) index * TYPE_WIDTH, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, int value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableUInt4Holder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, UInt4Holder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, int)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, int value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableUInt4Holder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableUInt4Holder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, UInt4Holder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, UInt4Holder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Sets the value at index to value isSet > 0, otherwise sets the index position
+ * to invalid/null.
+ */
+ public void set(int index, int isSet, int value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, int)} but will reallocate if the buffer if index
+ * is larger than the current capacity.
+ */
+ public void setSafe(int index, int isSet, int value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((UInt4Vector) to);
+ }
+
+ @Override
+ public void setWithPossibleTruncate(int index, long value) {
+ this.setSafe(index, (int) value);
+ }
+
+ @Override
+ public void setUnsafeWithPossibleTruncate(int index, long value) {
+ this.set(index, (int) value);
+ }
+
+ @Override
+ public long getValueAsLong(int index) {
+ return this.get(index) & PROMOTION_MASK;
+ }
+
+ @Override
+ public String toString() {
+ return ValueVectorUtility.getToString(this, 0, getValueCount(), (v, i) -> v.getObjectNoOverflow(i));
+ }
+
+ private class TransferImpl implements TransferPair {
+ UInt4Vector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new UInt4Vector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(UInt4Vector to) {
+ this.to = to;
+ }
+
+ @Override
+ public UInt4Vector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, UInt4Vector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/UInt8Vector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/UInt8Vector.java
new file mode 100644
index 000000000..98eaf25a6
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/UInt8Vector.java
@@ -0,0 +1,336 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import java.math.BigInteger;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.UInt8ReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.NullableUInt8Holder;
+import org.apache.arrow.vector.holders.UInt8Holder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+import org.apache.arrow.vector.util.ValueVectorUtility;
+
+/**
+ * UInt8Vector implements a fixed width vector (8 bytes) of
+ * integer values which could be null. A validity buffer (bit vector) is
+ * maintained to track which elements in the vector are null.
+ */
+public final class UInt8Vector extends BaseFixedWidthVector implements BaseIntVector {
+
+ /**
+ * The maximum 64-bit unsigned long integer.
+ */
+ public static final long MAX_UINT8 = 0XFFFFFFFFFFFFFFFFL;
+
+ public static final byte TYPE_WIDTH = 8;
+ private final FieldReader reader;
+
+ public UInt8Vector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.UINT8.getType()), allocator);
+ }
+
+ public UInt8Vector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ public UInt8Vector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ reader = new UInt8ReaderImpl(UInt8Vector.this);
+ }
+
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.UINT8;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+ private static final BigInteger SAFE_CONVERSION_MASK = new BigInteger("ffffffffffffffff", 16);
+
+ /**
+ * Given a data buffer, get the value stored at a particular position
+ * in the vector.
+ *
+ * <p>To avoid overflow, the returned type is one step up from the signed
+ * type.
+ *
+ * <p>This method is mainly meant for integration tests.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return value stored at the index.
+ */
+ public static BigInteger getNoOverflow(final ArrowBuf buffer, final int index) {
+ BigInteger l = BigInteger.valueOf(buffer.getLong((long) index * TYPE_WIDTH));
+ return SAFE_CONVERSION_MASK.and(l);
+ }
+
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public long get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableUInt8Holder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Long getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+ }
+
+ /**
+ * Returns the value stored at index without the potential for overflow.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public BigInteger getObjectNoOverflow(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return getNoOverflow(valueBuffer, index);
+ }
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ private void setValue(int index, long value) {
+ valueBuffer.setLong((long) index * TYPE_WIDTH, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, long value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableUInt8Holder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, UInt8Holder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, long)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, long value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableUInt8Holder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableUInt8Holder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, UInt8Holder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, UInt8Holder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /** Sets value at index is isSet is positive otherwise sets the index to invalid/null. */
+ public void set(int index, int isSet, long value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, long)} but will reallocate if index is greater than current capacity.
+ */
+ public void setSafe(int index, int isSet, long value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((UInt8Vector) to);
+ }
+
+ @Override
+ public void setWithPossibleTruncate(int index, long value) {
+ this.setSafe(index, value);
+ }
+
+ @Override
+ public void setUnsafeWithPossibleTruncate(int index, long value) {
+ this.set(index, value);
+ }
+
+ @Override
+ public long getValueAsLong(int index) {
+ return this.get(index);
+ }
+
+ @Override
+ public String toString() {
+ return ValueVectorUtility.getToString(this, 0, getValueCount(), (v, i) -> v.getObjectNoOverflow(i));
+ }
+
+ private class TransferImpl implements TransferPair {
+ UInt8Vector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new UInt8Vector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(UInt8Vector to) {
+ this.to = to;
+ }
+
+ @Override
+ public UInt8Vector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, UInt8Vector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java
new file mode 100644
index 000000000..aa29c2931
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java
@@ -0,0 +1,285 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import java.io.Closeable;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.OutOfMemoryException;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * An abstraction that is used to store a sequence of values in an individual column.
+ *
+ * <p>A {@link ValueVector value vector} stores underlying data in-memory in a columnar fashion that is compact and
+ * efficient. The column whose data is stored, is referred by {@link #getField()}.
+ *
+ * <p>It is important that vector is allocated before attempting to read or write.
+ *
+ * <p>There are a few "rules" around vectors:
+ *
+ * <ul>
+ * <li>values need to be written in order (e.g. index 0, 1, 2, 5)</li>
+ * <li>null vectors start with all values as null before writing anything</li>
+ * <li>for variable width types, the offset vector should be all zeros before writing</li>
+ * <li>you must call setValueCount before a vector can be read</li>
+ * <li>you should never write to a vector once it has been read.</li>
+ * </ul>
+ *
+ * <p>Please note that the current implementation doesn't enforce those rules, hence we may find few places that
+ * deviate from these rules (e.g. offset vectors in Variable Length and Repeated vector)
+ *
+ * <p>This interface "should" strive to guarantee this order of operation:
+ * <blockquote>
+ * allocate &gt; mutate &gt; setvaluecount &gt; access &gt; clear (or allocate to start the process over).
+ * </blockquote>
+ */
+public interface ValueVector extends Closeable, Iterable<ValueVector> {
+ /**
+ * Allocate new buffers. ValueVector implements logic to determine how much to allocate.
+ *
+ * @throws OutOfMemoryException Thrown if no memory can be allocated.
+ */
+ void allocateNew() throws OutOfMemoryException;
+
+ /**
+ * Allocates new buffers. ValueVector implements logic to determine how much to allocate.
+ *
+ * @return Returns true if allocation was successful.
+ */
+ boolean allocateNewSafe();
+
+ /**
+ * Allocate new buffer with double capacity, and copy data into the new buffer.
+ * Replace vector's buffer with new buffer, and release old one
+ */
+ void reAlloc();
+
+ BufferAllocator getAllocator();
+
+ /**
+ * Set the initial record capacity.
+ *
+ * @param numRecords the initial record capacity.
+ */
+ void setInitialCapacity(int numRecords);
+
+ /**
+ * Returns the maximum number of values that can be stored in this vector instance.
+ *
+ * @return the maximum number of values that can be stored in this vector instance.
+ */
+ int getValueCapacity();
+
+ /**
+ * Alternative to clear(). Allows use as an AutoCloseable in try-with-resources.
+ */
+ @Override
+ void close();
+
+ /**
+ * Release any owned ArrowBuf and reset the ValueVector to the initial state. If the
+ * vector has any child vectors, they will also be cleared.
+ */
+ void clear();
+
+ /**
+ * Reset the ValueVector to the initial state without releasing any owned ArrowBuf.
+ * Buffer capacities will remain unchanged and any previous data will be zeroed out.
+ * This includes buffers for data, validity, offset, etc. If the vector has any
+ * child vectors, they will also be reset.
+ */
+ void reset();
+
+ /**
+ * Get information about how this field is materialized.
+ *
+ * @return the field corresponding to this vector
+ */
+ Field getField();
+
+ MinorType getMinorType();
+
+ /**
+ * To transfer quota responsibility.
+ *
+ * @param allocator the target allocator
+ * @return a {@link org.apache.arrow.vector.util.TransferPair transfer pair}, creating a new target vector of
+ * the same type.
+ */
+ TransferPair getTransferPair(BufferAllocator allocator);
+
+ TransferPair getTransferPair(String ref, BufferAllocator allocator);
+
+ TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack);
+
+ /**
+ * Makes a new transfer pair used to transfer underlying buffers.
+ *
+ * @param target the target for the transfer
+ * @return a new {@link org.apache.arrow.vector.util.TransferPair transfer pair} that is used to transfer underlying
+ * buffers into the target vector.
+ */
+ TransferPair makeTransferPair(ValueVector target);
+
+ /**
+ * Get a reader for this vector.
+ *
+ * @return a {@link org.apache.arrow.vector.complex.reader.FieldReader field reader} that supports reading values
+ * from this vector.
+ */
+ FieldReader getReader();
+
+ /**
+ * Get the number of bytes used by this vector.
+ *
+ * @return the number of bytes that is used by this vector instance.
+ */
+ int getBufferSize();
+
+ /**
+ * Returns the number of bytes that is used by this vector if it holds the given number
+ * of values. The result will be the same as if setValueCount() were called, followed
+ * by calling getBufferSize(), but without any of the closing side-effects that setValueCount()
+ * implies wrt finishing off the population of a vector. Some operations might wish to use
+ * this to determine how much memory has been used by a vector so far, even though it is
+ * not finished being populated.
+ *
+ * @param valueCount the number of values to assume this vector contains
+ * @return the buffer size if this vector is holding valueCount values
+ */
+ int getBufferSizeFor(int valueCount);
+
+ /**
+ * Return the underlying buffers associated with this vector. Note that this doesn't impact the reference counts for
+ * this buffer so it only should be used for in-context access. Also note that this buffer changes regularly thus
+ * external classes shouldn't hold a reference to it (unless they change it).
+ *
+ * @param clear Whether to clear vector before returning; the buffers will still be refcounted;
+ * but the returned array will be the only reference to them
+ * @return The underlying {@link ArrowBuf buffers} that is used by this vector instance.
+ */
+ ArrowBuf[] getBuffers(boolean clear);
+
+ /**
+ * Gets the underlying buffer associated with validity vector.
+ *
+ * @return buffer
+ */
+ ArrowBuf getValidityBuffer();
+
+ /**
+ * Gets the underlying buffer associated with data vector.
+ *
+ * @return buffer
+ */
+ ArrowBuf getDataBuffer();
+
+ /**
+ * Gets the underlying buffer associated with offset vector.
+ *
+ * @return buffer
+ */
+ ArrowBuf getOffsetBuffer();
+
+ /**
+ * Gets the number of values.
+ *
+ * @return number of values in the vector
+ */
+ int getValueCount();
+
+ /**
+ * Set number of values in the vector.
+ */
+ void setValueCount(int valueCount);
+
+ /**
+ * Get friendly type object from the vector.
+ *
+ * @param index index of object to get
+ * @return friendly type object
+ */
+ Object getObject(int index);
+
+ /**
+ * Returns number of null elements in the vector.
+ *
+ * @return number of null elements
+ */
+ int getNullCount();
+
+ /**
+ * Check whether an element in the vector is null.
+ *
+ * @param index index to check for null
+ * @return true if element is null
+ */
+ boolean isNull(int index);
+
+ /**
+ * Returns hashCode of element in index with the default hasher.
+ */
+ int hashCode(int index);
+
+ /**
+ * Returns hashCode of element in index with the given hasher.
+ */
+ int hashCode(int index, ArrowBufHasher hasher);
+
+ /**
+ * Copy a cell value from a particular index in source vector to a particular
+ * position in this vector.
+ *
+ * @param fromIndex position to copy from in source vector
+ * @param thisIndex position to copy to in this vector
+ * @param from source vector
+ */
+ void copyFrom(int fromIndex, int thisIndex, ValueVector from);
+
+ /**
+ * Same as {@link #copyFrom(int, int, ValueVector)} except that
+ * it handles the case when the capacity of the vector needs to be expanded
+ * before copy.
+ *
+ * @param fromIndex position to copy from in source vector
+ * @param thisIndex position to copy to in this vector
+ * @param from source vector
+ */
+ void copyFromSafe(int fromIndex, int thisIndex, ValueVector from);
+
+ /**
+ * Accept a generic {@link VectorVisitor} and return the result.
+ * @param <OUT> the output result type.
+ * @param <IN> the input data together with visitor.
+ */
+ <OUT, IN> OUT accept(VectorVisitor<OUT, IN> visitor, IN value);
+
+ /**
+ * Gets the name of the vector.
+ * @return the name of the vector.
+ */
+ String getName();
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java
new file mode 100644
index 000000000..798d30fe4
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java
@@ -0,0 +1,306 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.VarBinaryReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.NullableVarBinaryHolder;
+import org.apache.arrow.vector.holders.VarBinaryHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * VarBinaryVector implements a variable width vector of binary
+ * values which could be NULL. A validity buffer (bit vector) is maintained
+ * to track which elements in the vector are null.
+ */
+public final class VarBinaryVector extends BaseVariableWidthVector {
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a VarBinaryVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public VarBinaryVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.VARBINARY.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a VarBinaryVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public VarBinaryVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a VarBinaryVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public VarBinaryVector(Field field, BufferAllocator allocator) {
+ super(field, allocator);
+ reader = new VarBinaryReaderImpl(VarBinaryVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.VARBINARY;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the variable length element at specified index as byte array.
+ *
+ * @param index position of element to get
+ * @return array of bytes for non-null element, null otherwise
+ */
+ public byte[] get(int index) {
+ assert index >= 0;
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ return null;
+ }
+ final int startOffset = getStartOffset(index);
+ final int dataLength =
+ offsetBuffer.getInt((long) (index + 1) * OFFSET_WIDTH) - startOffset;
+ final byte[] result = new byte[dataLength];
+ valueBuffer.getBytes(startOffset, result, 0, dataLength);
+ return result;
+ }
+
+ /**
+ * Get the variable length element at specified index as Text.
+ *
+ * @param index position of element to get
+ * @return byte array for non-null element, null otherwise
+ */
+ public byte[] getObject(int index) {
+ return get(index);
+ }
+
+ /**
+ * Get the variable length element at specified index and sets the state
+ * in provided holder.
+ *
+ * @param index position of element to get
+ * @param holder data holder to be populated by this function
+ */
+ public void get(int index, NullableVarBinaryHolder holder) {
+ assert index >= 0;
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.start = getStartOffset(index);
+ holder.end = offsetBuffer.getInt((index + 1) * OFFSET_WIDTH);
+ holder.buffer = valueBuffer;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Set the variable length element at the specified index to the data
+ * buffer supplied in the holder.
+ *
+ * @param index position of the element to set
+ * @param holder holder that carries data buffer.
+ */
+ public void set(int index, VarBinaryHolder holder) {
+ assert index >= 0;
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ final int dataLength = holder.end - holder.start;
+ final int startOffset = getStartOffset(index);
+ offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+ valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength);
+ lastSet = index;
+ }
+
+ /**
+ * Same as {@link #set(int, VarBinaryHolder)} except that it handles the
+ * case where index and length of new element are beyond the existing
+ * capacity of the vector.
+ *
+ * @param index position of the element to set
+ * @param holder holder that carries data buffer.
+ */
+ public void setSafe(int index, VarBinaryHolder holder) {
+ assert index >= 0;
+ final int dataLength = holder.end - holder.start;
+ handleSafe(index, dataLength);
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ final int startOffset = getStartOffset(index);
+ offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+ valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength);
+ lastSet = index;
+ }
+
+ /**
+ * Set the variable length element at the specified index to the data
+ * buffer supplied in the holder.
+ *
+ * @param index position of the element to set
+ * @param holder holder that carries data buffer.
+ */
+ public void set(int index, NullableVarBinaryHolder holder) {
+ assert index >= 0;
+ fillHoles(index);
+ BitVectorHelper.setValidityBit(validityBuffer, index, holder.isSet);
+ final int startOffset = getStartOffset(index);
+ if (holder.isSet != 0) {
+ final int dataLength = holder.end - holder.start;
+ offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+ valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength);
+ } else {
+ offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset);
+ }
+ lastSet = index;
+ }
+
+ /**
+ * Same as {@link #set(int, NullableVarBinaryHolder)} except that it handles the
+ * case where index and length of new element are beyond the existing
+ * capacity of the vector.
+ *
+ * @param index position of the element to set
+ * @param holder holder that carries data buffer.
+ */
+ public void setSafe(int index, NullableVarBinaryHolder holder) {
+ assert index >= 0;
+ if (holder.isSet != 0) {
+ final int dataLength = holder.end - holder.start;
+ handleSafe(index, dataLength);
+ fillHoles(index);
+ final int startOffset = getStartOffset(index);
+ offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+ valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength);
+ } else {
+ fillEmpties(index + 1);
+ }
+ BitVectorHelper.setValidityBit(validityBuffer, index, holder.isSet);
+ lastSet = index;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((VarBinaryVector) to);
+ }
+
+ private class TransferImpl implements TransferPair {
+ VarBinaryVector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new VarBinaryVector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(VarBinaryVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public VarBinaryVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, VarBinaryVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java
new file mode 100644
index 000000000..e725e2d28
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java
@@ -0,0 +1,331 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.VarCharReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.NullableVarCharHolder;
+import org.apache.arrow.vector.holders.VarCharHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.Text;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * VarCharVector implements a variable width vector of VARCHAR
+ * values which could be NULL. A validity buffer (bit vector) is maintained
+ * to track which elements in the vector are null.
+ */
+public final class VarCharVector extends BaseVariableWidthVector {
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a VarCharVector. This doesn't allocate any memory for
+ * the data in vector.
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public VarCharVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.VARCHAR.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a VarCharVector. This doesn't allocate any memory for
+ * the data in vector.
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public VarCharVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a VarCharVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public VarCharVector(Field field, BufferAllocator allocator) {
+ super(field, allocator);
+ reader = new VarCharReaderImpl(VarCharVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.VARCHAR;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the variable length element at specified index as byte array.
+ *
+ * @param index position of element to get
+ * @return array of bytes for non-null element, null otherwise
+ */
+ public byte[] get(int index) {
+ assert index >= 0;
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ return null;
+ }
+ final int startOffset = getStartOffset(index);
+ final int dataLength =
+ offsetBuffer.getInt((long) (index + 1) * OFFSET_WIDTH) - startOffset;
+ final byte[] result = new byte[dataLength];
+ valueBuffer.getBytes(startOffset, result, 0, dataLength);
+ return result;
+ }
+
+ /**
+ * Get the variable length element at specified index as Text.
+ *
+ * @param index position of element to get
+ * @return Text object for non-null element, null otherwise
+ */
+ public Text getObject(int index) {
+ byte[] b = get(index);
+ if (b == null) {
+ return null;
+ } else {
+ return new Text(b);
+ }
+ }
+
+ /**
+ * Get the variable length element at specified index and sets the state
+ * in provided holder.
+ *
+ * @param index position of element to get
+ * @param holder data holder to be populated by this function
+ */
+ public void get(int index, NullableVarCharHolder holder) {
+ assert index >= 0;
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.start = getStartOffset(index);
+ holder.end = offsetBuffer.getInt((index + 1) * OFFSET_WIDTH);
+ holder.buffer = valueBuffer;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Set the variable length element at the specified index to the data
+ * buffer supplied in the holder.
+ *
+ * @param index position of the element to set
+ * @param holder holder that carries data buffer.
+ */
+ public void set(int index, VarCharHolder holder) {
+ assert index >= 0;
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ final int dataLength = holder.end - holder.start;
+ final int startOffset = getStartOffset(index);
+ offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+ valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength);
+ lastSet = index;
+ }
+
+ /**
+ * Same as {@link #set(int, VarCharHolder)} except that it handles the
+ * case where index and length of new element are beyond the existing
+ * capacity of the vector.
+ *
+ * @param index position of the element to set
+ * @param holder holder that carries data buffer.
+ */
+ public void setSafe(int index, VarCharHolder holder) {
+ assert index >= 0;
+ final int dataLength = holder.end - holder.start;
+ handleSafe(index, dataLength);
+ fillHoles(index);
+
+ BitVectorHelper.setBit(validityBuffer, index);
+ final int startOffset = getStartOffset(index);
+ offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+ valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength);
+ lastSet = index;
+ }
+
+ /**
+ * Set the variable length element at the specified index to the data
+ * buffer supplied in the holder.
+ *
+ * @param index position of the element to set
+ * @param holder holder that carries data buffer.
+ */
+ public void set(int index, NullableVarCharHolder holder) {
+ assert index >= 0;
+ fillHoles(index);
+ BitVectorHelper.setValidityBit(validityBuffer, index, holder.isSet);
+ final int startOffset = getStartOffset(index);
+ if (holder.isSet != 0) {
+ final int dataLength = holder.end - holder.start;
+ offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+ valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength);
+ } else {
+ offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset);
+ }
+ lastSet = index;
+ }
+
+ /**
+ * Same as {@link #set(int, NullableVarCharHolder)} except that it handles the
+ * case where index and length of new element are beyond the existing
+ * capacity of the vector.
+ *
+ * @param index position of the element to set
+ * @param holder holder that carries data buffer.
+ */
+ public void setSafe(int index, NullableVarCharHolder holder) {
+ assert index >= 0;
+ if (holder.isSet != 0) {
+ final int dataLength = holder.end - holder.start;
+ handleSafe(index, dataLength);
+ fillHoles(index);
+ final int startOffset = getStartOffset(index);
+ offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+ valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength);
+ } else {
+ fillEmpties(index + 1);
+ }
+ BitVectorHelper.setValidityBit(validityBuffer, index, holder.isSet);
+ lastSet = index;
+ }
+
+ /**
+ * Set the variable length element at the specified index to the
+ * content in supplied Text.
+ *
+ * @param index position of the element to set
+ * @param text Text object with data
+ */
+ public void set(int index, Text text) {
+ set(index, text.getBytes(), 0, text.getLength());
+ }
+
+ /**
+ * Same as {@link #set(int, NullableVarCharHolder)} except that it handles the
+ * case where index and length of new element are beyond the existing
+ * capacity of the vector.
+ *
+ * @param index position of the element to set.
+ * @param text Text object with data
+ */
+ public void setSafe(int index, Text text) {
+ setSafe(index, text.getBytes(), 0, text.getLength());
+ }
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((VarCharVector) to);
+ }
+
+ private class TransferImpl implements TransferPair {
+ VarCharVector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new VarCharVector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(VarCharVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public VarCharVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, VarCharVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VariableWidthVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VariableWidthVector.java
new file mode 100644
index 000000000..f6b8364e3
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VariableWidthVector.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+/**
+ * Interface vectors that contain variable width members (e.g. Strings, Lists, etc).
+ */
+public interface VariableWidthVector extends ElementAddressableVector, DensityAwareVector {
+
+ /**
+ * Allocate a new memory space for this vector. Must be called prior to using the ValueVector.
+ *
+ * @param totalBytes Desired size of the underlying data buffer.
+ * @param valueCount Number of values in the vector.
+ */
+ void allocateNew(long totalBytes, int valueCount);
+
+ /**
+ * Allocate a new memory space for this vector. Must be called prior to using the ValueVector.
+ * The initial size in bytes is either default (or) reused from previous allocation
+ *
+ * @param valueCount Number of values in the vector.
+ */
+ void allocateNew(int valueCount);
+
+ /**
+ * Provide the maximum amount of variable width bytes that can be stored in this vector.
+ *
+ * @return the byte capacity of this vector
+ */
+ int getByteCapacity();
+
+ /**
+ * Provide the number of bytes contained in the valueBuffer.
+ * @return the number of bytes in valueBuffer.
+ */
+ int sizeOfValueBuffer();
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VectorDefinitionSetter.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VectorDefinitionSetter.java
new file mode 100644
index 000000000..39804ee41
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VectorDefinitionSetter.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+/**
+ * Interface for setting a specific index values as defined/valid on a vector.
+ */
+public interface VectorDefinitionSetter {
+
+ void setIndexDefined(int index);
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java
new file mode 100644
index 000000000..ed5f3aef1
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.util.Preconditions.checkArgument;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.util.Collections2;
+import org.apache.arrow.vector.compression.CompressionCodec;
+import org.apache.arrow.vector.compression.CompressionUtil;
+import org.apache.arrow.vector.compression.NoCompressionCodec;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.types.pojo.Field;
+
+/**
+ * Loads buffers into vectors.
+ */
+public class VectorLoader {
+
+ private final VectorSchemaRoot root;
+
+ private final CompressionCodec.Factory factory;
+
+ /**
+ * A flag indicating if decompression is needed.
+ * This will affect the behavior of releasing buffers.
+ */
+ private boolean decompressionNeeded;
+
+ /**
+ * Construct with a root to load and will create children in root based on schema.
+ *
+ * @param root the root to add vectors to based on schema
+ */
+ public VectorLoader(VectorSchemaRoot root) {
+ this(root, NoCompressionCodec.Factory.INSTANCE);
+ }
+
+ /**
+ * Construct with a root to load and will create children in root based on schema.
+ *
+ * @param root the root to add vectors to based on schema.
+ * @param factory the factory to create codec.
+ */
+ public VectorLoader(VectorSchemaRoot root, CompressionCodec.Factory factory) {
+ this.root = root;
+ this.factory = factory;
+ }
+
+ /**
+ * Loads the record batch in the vectors.
+ * will not close the record batch
+ *
+ * @param recordBatch the batch to load
+ */
+ public void load(ArrowRecordBatch recordBatch) {
+ Iterator<ArrowBuf> buffers = recordBatch.getBuffers().iterator();
+ Iterator<ArrowFieldNode> nodes = recordBatch.getNodes().iterator();
+ CompressionUtil.CodecType codecType =
+ CompressionUtil.CodecType.fromCompressionType(recordBatch.getBodyCompression().getCodec());
+ decompressionNeeded = codecType != CompressionUtil.CodecType.NO_COMPRESSION;
+ CompressionCodec codec = decompressionNeeded ? factory.createCodec(codecType) : NoCompressionCodec.INSTANCE;
+ for (FieldVector fieldVector : root.getFieldVectors()) {
+ loadBuffers(fieldVector, fieldVector.getField(), buffers, nodes, codec);
+ }
+ root.setRowCount(recordBatch.getLength());
+ if (nodes.hasNext() || buffers.hasNext()) {
+ throw new IllegalArgumentException("not all nodes and buffers were consumed. nodes: " +
+ Collections2.toString(nodes) + " buffers: " + Collections2.toString(buffers));
+ }
+ }
+
+ private void loadBuffers(
+ FieldVector vector,
+ Field field,
+ Iterator<ArrowBuf> buffers,
+ Iterator<ArrowFieldNode> nodes,
+ CompressionCodec codec) {
+ checkArgument(nodes.hasNext(), "no more field nodes for for field %s and vector %s", field, vector);
+ ArrowFieldNode fieldNode = nodes.next();
+ int bufferLayoutCount = TypeLayout.getTypeBufferCount(field.getType());
+ List<ArrowBuf> ownBuffers = new ArrayList<>(bufferLayoutCount);
+ for (int j = 0; j < bufferLayoutCount; j++) {
+ ArrowBuf nextBuf = buffers.next();
+ // for vectors without nulls, the buffer is empty, so there is no need to decompress it.
+ ArrowBuf bufferToAdd = nextBuf.writerIndex() > 0 ? codec.decompress(vector.getAllocator(), nextBuf) : nextBuf;
+ ownBuffers.add(bufferToAdd);
+ if (decompressionNeeded) {
+ // decompression performed
+ nextBuf.getReferenceManager().retain();
+ }
+ }
+ try {
+ vector.loadFieldBuffers(fieldNode, ownBuffers);
+ if (decompressionNeeded) {
+ for (ArrowBuf buf : ownBuffers) {
+ buf.close();
+ }
+ }
+ } catch (RuntimeException e) {
+ throw new IllegalArgumentException("Could not load buffers for field " +
+ field + ". error message: " + e.getMessage(), e);
+ }
+ List<Field> children = field.getChildren();
+ if (children.size() > 0) {
+ List<FieldVector> childrenFromFields = vector.getChildrenFromFields();
+ checkArgument(children.size() == childrenFromFields.size(),
+ "should have as many children as in the schema: found %s expected %s",
+ childrenFromFields.size(), children.size());
+ for (int i = 0; i < childrenFromFields.size(); i++) {
+ Field child = children.get(i);
+ FieldVector fieldVector = childrenFromFields.get(i);
+ loadBuffers(fieldVector, child, buffers, nodes, codec);
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java
new file mode 100644
index 000000000..623c77317
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java
@@ -0,0 +1,429 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+import java.util.stream.StreamSupport;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.util.AutoCloseables;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.compare.ApproxEqualsVisitor;
+import org.apache.arrow.vector.compare.Range;
+import org.apache.arrow.vector.compare.VectorEqualsVisitor;
+import org.apache.arrow.vector.compare.VectorValueEqualizer;
+import org.apache.arrow.vector.compare.util.ValueEpsilonEqualizers;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * Holder for a set of vectors to be loaded/unloaded.
+ * A VectorSchemaRoot is a container that can hold batches, batches flow through VectorSchemaRoot
+ * as part of a pipeline. Note this is different from other implementations (i.e. in C++ and Python,
+ * a RecordBatch is a collection of equal-length vector instances and was created each time for a new batch).
+
+ * The recommended usage for VectorSchemaRoot is creating a single VectorSchemaRoot based on the known
+ * schema and populated data over and over into the same VectorSchemaRoot in a stream of batches rather
+ * than create a new VectorSchemaRoot instance each time (see Flight or ArrowFileWriter for better understanding).
+ * Thus at any one point a VectorSchemaRoot may have data or may have no data (say it was transferred downstream
+ * or not yet populated).
+ */
+public class VectorSchemaRoot implements AutoCloseable {
+
+ private Schema schema;
+ private int rowCount;
+ private final List<FieldVector> fieldVectors;
+ private final Map<Field, FieldVector> fieldVectorsMap = new LinkedHashMap<>();
+
+
+ /**
+ * Constructs new instance containing each of the vectors.
+ */
+ public VectorSchemaRoot(Iterable<FieldVector> vectors) {
+ this(
+ StreamSupport.stream(vectors.spliterator(), false).map(t -> t.getField()).collect(Collectors.toList()),
+ StreamSupport.stream(vectors.spliterator(), false).collect(Collectors.toList())
+ );
+ }
+
+ /**
+ * Constructs a new instance containing the children of parent but not the parent itself.
+ */
+ public VectorSchemaRoot(FieldVector parent) {
+ this(parent.getField().getChildren(), parent.getChildrenFromFields(), parent.getValueCount());
+ }
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param fields The types of each vector.
+ * @param fieldVectors The data vectors (must be equal in size to <code>fields</code>.
+ */
+ public VectorSchemaRoot(List<Field> fields, List<FieldVector> fieldVectors) {
+ this(new Schema(fields), fieldVectors, fieldVectors.size() == 0 ? 0 : fieldVectors.get(0).getValueCount());
+ }
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param fields The types of each vector.
+ * @param fieldVectors The data vectors (must be equal in size to <code>fields</code>.
+ * @param rowCount The number of rows contained.
+ */
+ public VectorSchemaRoot(List<Field> fields, List<FieldVector> fieldVectors, int rowCount) {
+ this(new Schema(fields), fieldVectors, rowCount);
+ }
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param schema The schema for the vectors.
+ * @param fieldVectors The data vectors.
+ * @param rowCount The number of rows
+ */
+ public VectorSchemaRoot(Schema schema, List<FieldVector> fieldVectors, int rowCount) {
+ if (schema.getFields().size() != fieldVectors.size()) {
+ throw new IllegalArgumentException("Fields must match field vectors. Found " +
+ fieldVectors.size() + " vectors and " + schema.getFields().size() + " fields");
+ }
+ this.schema = schema;
+ this.rowCount = rowCount;
+ this.fieldVectors = fieldVectors;
+ for (int i = 0; i < schema.getFields().size(); ++i) {
+ Field field = schema.getFields().get(i);
+ FieldVector vector = fieldVectors.get(i);
+ fieldVectorsMap.put(field, vector);
+ }
+ }
+
+ /**
+ * Creates a new set of empty vectors corresponding to the given schema.
+ */
+ public static VectorSchemaRoot create(Schema schema, BufferAllocator allocator) {
+ List<FieldVector> fieldVectors = new ArrayList<>();
+ for (Field field : schema.getFields()) {
+ FieldVector vector = field.createVector(allocator);
+ fieldVectors.add(vector);
+ }
+ if (fieldVectors.size() != schema.getFields().size()) {
+ throw new IllegalArgumentException("The root vector did not create the right number of children. found " +
+ fieldVectors.size() + " expected " + schema.getFields().size());
+ }
+ return new VectorSchemaRoot(schema, fieldVectors, 0);
+ }
+
+ /** Constructs a new instance from vectors. */
+ public static VectorSchemaRoot of(FieldVector... vectors) {
+ return new VectorSchemaRoot(Arrays.stream(vectors).collect(Collectors.toList()));
+ }
+
+ /**
+ * Do an adaptive allocation of each vector for memory purposes. Sizes will be based on previously
+ * defined initial allocation for each vector (and subsequent size learned).
+ */
+ public void allocateNew() {
+ for (FieldVector v : fieldVectors) {
+ v.allocateNew();
+ }
+ rowCount = 0;
+ }
+
+ /**
+ * Release all the memory for each vector held in this root. This DOES NOT remove vectors from the container.
+ */
+ public void clear() {
+ for (FieldVector v : fieldVectors) {
+ v.clear();
+ }
+ rowCount = 0;
+ }
+
+ public List<FieldVector> getFieldVectors() {
+ return fieldVectors.stream().collect(Collectors.toList());
+ }
+
+ /**
+ * gets a vector by name.
+ *
+ * if name occurs multiple times this returns the first inserted entry for name
+ */
+ public FieldVector getVector(String name) {
+ for (Map.Entry<Field, FieldVector> entry: fieldVectorsMap.entrySet()) {
+ if (entry.getKey().getName().equals(name)) {
+ return entry.getValue();
+ }
+ }
+ return null;
+ }
+
+ public FieldVector getVector(Field field) {
+ return fieldVectorsMap.get(field);
+ }
+
+ public FieldVector getVector(int index) {
+ Preconditions.checkArgument(index >= 0 && index < fieldVectors.size());
+ return fieldVectors.get(index);
+ }
+
+ /**
+ * Add vector to the record batch, producing a new VectorSchemaRoot.
+ * @param index field index
+ * @param vector vector to be added.
+ * @return out VectorSchemaRoot with vector added
+ */
+ public VectorSchemaRoot addVector(int index, FieldVector vector) {
+ Preconditions.checkNotNull(vector);
+ Preconditions.checkArgument(index >= 0 && index < fieldVectors.size());
+ List<FieldVector> newVectors = new ArrayList<>();
+ for (int i = 0; i < fieldVectors.size(); i++) {
+ if (i == index) {
+ newVectors.add(vector);
+ }
+ newVectors.add(fieldVectors.get(i));
+ }
+ return new VectorSchemaRoot(newVectors);
+ }
+
+ /**
+ * Remove vector from the record batch, producing a new VectorSchemaRoot.
+ * @param index field index
+ * @return out VectorSchemaRoot with vector removed
+ */
+ public VectorSchemaRoot removeVector(int index) {
+ Preconditions.checkArgument(index >= 0 && index < fieldVectors.size());
+ List<FieldVector> newVectors = new ArrayList<>();
+ for (int i = 0; i < fieldVectors.size(); i++) {
+ if (i != index) {
+ newVectors.add(fieldVectors.get(i));
+ }
+ }
+ return new VectorSchemaRoot(newVectors);
+ }
+
+ public Schema getSchema() {
+ return schema;
+ }
+
+ public int getRowCount() {
+ return rowCount;
+ }
+
+ /**
+ * Set the row count of all the vectors in this container. Also sets the value
+ * count for each root level contained FieldVector.
+ * @param rowCount Number of records.
+ */
+ public void setRowCount(int rowCount) {
+ this.rowCount = rowCount;
+ for (FieldVector v : getFieldVectors()) {
+ v.setValueCount(rowCount);
+ }
+ }
+
+ @Override
+ public void close() {
+ try {
+ AutoCloseables.close(fieldVectors);
+ } catch (RuntimeException ex) {
+ throw ex;
+ } catch (Exception ex) {
+ // should never happen since FieldVector.close() doesn't throw IOException
+ throw new RuntimeException(ex);
+ }
+ }
+
+ private void printRow(StringBuilder sb, List<Object> row) {
+ boolean first = true;
+ for (Object v : row) {
+ if (first) {
+ first = false;
+ } else {
+ sb.append("\t");
+ }
+ sb.append(v);
+ }
+ sb.append("\n");
+ }
+
+ /**
+ * Returns a tab separated value of vectors (based on their java object representation).
+ */
+ public String contentToTSVString() {
+ StringBuilder sb = new StringBuilder();
+ List<Object> row = new ArrayList<>(schema.getFields().size());
+ for (Field field : schema.getFields()) {
+ row.add(field.getName());
+ }
+ printRow(sb, row);
+ for (int i = 0; i < rowCount; i++) {
+ row.clear();
+ for (FieldVector v : fieldVectors) {
+ row.add(v.getObject(i));
+ }
+ printRow(sb, row);
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Synchronizes the schema from the current vectors.
+ * In some cases, the schema and the actual vector structure may be different.
+ * This can be caused by a promoted writer (For details, please see
+ * {@link org.apache.arrow.vector.complex.impl.PromotableWriter}).
+ * For example, when writing different types of data to a {@link org.apache.arrow.vector.complex.ListVector}
+ * may lead to such a case.
+ * When this happens, this method should be called to bring the schema and vector structure in a synchronized state.
+ * @return true if the schema is updated, false otherwise.
+ */
+ public boolean syncSchema() {
+ List<Field> oldFields = this.schema.getFields();
+ List<Field> newFields = this.fieldVectors.stream().map(ValueVector::getField).collect(Collectors.toList());
+ if (!oldFields.equals(newFields)) {
+ this.schema = new Schema(newFields);
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Slice this root from desired index.
+ * @param index start position of the slice
+ * @return the sliced root
+ */
+ public VectorSchemaRoot slice(int index) {
+ return slice(index, this.rowCount - index);
+ }
+
+ /**
+ * Slice this root at desired index and length.
+ * @param index start position of the slice
+ * @param length length of the slice
+ * @return the sliced root
+ */
+ public VectorSchemaRoot slice(int index, int length) {
+ Preconditions.checkArgument(index >= 0, "expecting non-negative index");
+ Preconditions.checkArgument(length >= 0, "expecting non-negative length");
+ Preconditions.checkArgument(index + length <= rowCount,
+ "index + length should <= rowCount");
+
+ if (index == 0 && length == rowCount) {
+ return this;
+ }
+
+ List<FieldVector> sliceVectors = fieldVectors.stream().map(v -> {
+ TransferPair transferPair = v.getTransferPair(v.getAllocator());
+ transferPair.splitAndTransfer(index, length);
+ return (FieldVector) transferPair.getTo();
+ }).collect(Collectors.toList());
+
+ return new VectorSchemaRoot(sliceVectors);
+ }
+
+ /**
+ * Determine if two VectorSchemaRoots are exactly equal.
+ */
+ public boolean equals(VectorSchemaRoot other) {
+ if (other == null) {
+ return false;
+ }
+
+ if (!this.schema.equals(other.schema)) {
+ return false;
+ }
+
+ if (this.rowCount != other.rowCount) {
+ return false;
+ }
+
+ for (int i = 0; i < fieldVectors.size(); i++) {
+ FieldVector vector = fieldVectors.get(i);
+ FieldVector otherVector = other.fieldVectors.get(i);
+ if (!VectorEqualsVisitor.vectorEquals(vector, otherVector)) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ /**
+ * Determine if two VectorSchemaRoots are approximately equal using the given functions to
+ * calculate difference between float/double values.
+ * Note that approx equals are in regards to floating point values, other values are comparing
+ * to exactly equals.
+ *
+ * @param floatDiffFunction function to calculate difference between float values.
+ * @param doubleDiffFunction function to calculate difference between double values.
+ */
+ public boolean approxEquals(
+ VectorSchemaRoot other,
+ VectorValueEqualizer<Float4Vector> floatDiffFunction,
+ VectorValueEqualizer<Float8Vector> doubleDiffFunction) {
+
+ Preconditions.checkNotNull(floatDiffFunction);
+ Preconditions.checkNotNull(doubleDiffFunction);
+
+ if (other == null) {
+ return false;
+ }
+
+ if (!this.schema.equals(other.schema)) {
+ return false;
+ }
+
+ if (this.rowCount != other.rowCount) {
+ return false;
+ }
+
+ Range range = new Range(0, 0, 0);
+ for (int i = 0; i < fieldVectors.size(); i++) {
+ FieldVector vector = fieldVectors.get(i);
+ FieldVector otherVector = other.fieldVectors.get(i);
+ if (vector.getValueCount() != otherVector.getValueCount()) {
+ return false;
+ }
+ ApproxEqualsVisitor visitor =
+ new ApproxEqualsVisitor(vector, otherVector, floatDiffFunction, doubleDiffFunction);
+ range.setLength(vector.getValueCount());
+ if (!visitor.rangeEquals(range)) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ /**
+ * Determine if two VectorSchemaRoots are approximately equal using default functions to
+ * calculate difference between float/double values.
+ */
+ public boolean approxEquals(VectorSchemaRoot other) {
+ VectorValueEqualizer<Float4Vector> floatDiffFunction =
+ new ValueEpsilonEqualizers.Float4EpsilonEqualizer(ApproxEqualsVisitor.DEFAULT_FLOAT_EPSILON);
+ VectorValueEqualizer<Float8Vector> doubleDiffFunction =
+ new ValueEpsilonEqualizers.Float8EpsilonEqualizer(ApproxEqualsVisitor.DEFAULT_DOUBLE_EPSILON);
+ return approxEquals(other, floatDiffFunction, doubleDiffFunction);
+ }
+}
+
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java
new file mode 100644
index 000000000..e2cbf3ec1
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.vector.compression.CompressionCodec;
+import org.apache.arrow.vector.compression.CompressionUtil;
+import org.apache.arrow.vector.compression.NoCompressionCodec;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+
+/**
+ * Helper class that handles converting a {@link VectorSchemaRoot}
+ * to a {@link ArrowRecordBatch}.
+ */
+public class VectorUnloader {
+
+ private final VectorSchemaRoot root;
+ private final boolean includeNullCount;
+ private final CompressionCodec codec;
+ private final boolean alignBuffers;
+
+ /**
+ * Constructs a new instance of the given set of vectors.
+ */
+ public VectorUnloader(VectorSchemaRoot root) {
+ this(root, true, NoCompressionCodec.INSTANCE, true);
+ }
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param root The set of vectors to serialize to an {@link ArrowRecordBatch}.
+ * @param includeNullCount Controls whether null count is copied to the {@link ArrowRecordBatch}
+ * @param alignBuffers Controls if buffers get aligned to 8-byte boundaries.
+ */
+ public VectorUnloader(
+ VectorSchemaRoot root, boolean includeNullCount, boolean alignBuffers) {
+ this(root, includeNullCount, NoCompressionCodec.INSTANCE, alignBuffers);
+ }
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param root The set of vectors to serialize to an {@link ArrowRecordBatch}.
+ * @param includeNullCount Controls whether null count is copied to the {@link ArrowRecordBatch}
+ * @param codec the codec for compressing data. If it is null, then no compression is needed.
+ * @param alignBuffers Controls if buffers get aligned to 8-byte boundaries.
+ */
+ public VectorUnloader(
+ VectorSchemaRoot root, boolean includeNullCount, CompressionCodec codec, boolean alignBuffers) {
+ this.root = root;
+ this.includeNullCount = includeNullCount;
+ this.codec = codec;
+ this.alignBuffers = alignBuffers;
+ }
+
+ /**
+ * Performs the depth first traversal of the Vectors to create an {@link ArrowRecordBatch} suitable
+ * for serialization.
+ */
+ public ArrowRecordBatch getRecordBatch() {
+ List<ArrowFieldNode> nodes = new ArrayList<>();
+ List<ArrowBuf> buffers = new ArrayList<>();
+ for (FieldVector vector : root.getFieldVectors()) {
+ appendNodes(vector, nodes, buffers);
+ }
+ return new ArrowRecordBatch(
+ root.getRowCount(), nodes, buffers, CompressionUtil.createBodyCompression(codec), alignBuffers);
+ }
+
+ private void appendNodes(FieldVector vector, List<ArrowFieldNode> nodes, List<ArrowBuf> buffers) {
+ nodes.add(new ArrowFieldNode(vector.getValueCount(), includeNullCount ? vector.getNullCount() : -1));
+ List<ArrowBuf> fieldBuffers = vector.getFieldBuffers();
+ int expectedBufferCount = TypeLayout.getTypeBufferCount(vector.getField().getType());
+ if (fieldBuffers.size() != expectedBufferCount) {
+ throw new IllegalArgumentException(String.format(
+ "wrong number of buffers for field %s in vector %s. found: %s",
+ vector.getField(), vector.getClass().getSimpleName(), fieldBuffers));
+ }
+ for (ArrowBuf buf : fieldBuffers) {
+ buffers.add(codec.compress(vector.getAllocator(), buf));
+ }
+ for (FieldVector child : vector.getChildrenFromFields()) {
+ appendNodes(child, nodes, buffers);
+ }
+ }
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ZeroVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ZeroVector.java
new file mode 100644
index 000000000..079b5c103
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ZeroVector.java
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.util.ArrowBufPointer;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * A zero length vector of any type.
+ */
+public final class ZeroVector extends NullVector {
+ public static final ZeroVector INSTANCE = new ZeroVector();
+
+ /**
+ * Instantiate a ZeroVector.
+ *
+ * @param name name of the vector
+ */
+ public ZeroVector(String name) {
+ super(name);
+ }
+
+ /**
+ * Instantiate a ZeroVector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector.
+ */
+ public ZeroVector(String name, FieldType fieldType) {
+ super(name, fieldType);
+ }
+
+ /**
+ * Instantiate a ZeroVector.
+ *
+ * @param field field materialized by this vector.
+ */
+ public ZeroVector(Field field) {
+ super(field);
+ }
+
+ @Deprecated
+ public ZeroVector() {
+ }
+
+ @Override
+ public int getValueCount() {
+ return 0;
+ }
+
+ @Override
+ public void setValueCount(int valueCount) {
+ }
+
+ @Override
+ public int getNullCount() {
+ return 0;
+ }
+
+ @Override
+ public boolean isNull(int index) {
+ throw new IndexOutOfBoundsException();
+ }
+
+ @Override
+ public int hashCode(int index) {
+ return 0;
+ }
+
+ @Override
+ public int hashCode(int index, ArrowBufHasher hasher) {
+ return ArrowBufPointer.NULL_HASH_CODE;
+ }
+
+ @Override
+ public int getValueCapacity() {
+ return 0;
+ }
+
+ @Override
+ public TransferPair getTransferPair(BufferAllocator allocator) {
+ return defaultPair;
+ }
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return defaultPair;
+ }
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) {
+ return defaultPair;
+ }
+
+ @Override
+ public TransferPair makeTransferPair(ValueVector target) {
+ return defaultPair;
+ }
+
+ private final TransferPair defaultPair = new TransferPair() {
+ @Override
+ public void transfer() {
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ }
+
+ @Override
+ public ValueVector getTo() {
+ return ZeroVector.this;
+ }
+
+ @Override
+ public void copyValueSafe(int from, int to) {
+ }
+ };
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/ApproxEqualsVisitor.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/ApproxEqualsVisitor.java
new file mode 100644
index 000000000..bcf8c64e0
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/ApproxEqualsVisitor.java
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.compare;
+
+import java.util.function.BiFunction;
+
+import org.apache.arrow.vector.BaseFixedWidthVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.compare.util.ValueEpsilonEqualizers;
+
+/**
+ * Visitor to compare floating point vectors approximately.
+ */
+public class ApproxEqualsVisitor extends RangeEqualsVisitor {
+
+ /**
+ * Functions to calculate difference between float/double values.
+ */
+ private final VectorValueEqualizer<Float4Vector> floatDiffFunction;
+ private final VectorValueEqualizer<Float8Vector> doubleDiffFunction;
+
+ /**
+ * Default epsilons for diff functions.
+ */
+ public static final float DEFAULT_FLOAT_EPSILON = 1.0E-6f;
+ public static final double DEFAULT_DOUBLE_EPSILON = 1.0E-6;
+
+ /**
+ * Constructs a new instance with default tolerances.
+ * @param left left vector
+ * @param right right vector
+ */
+ public ApproxEqualsVisitor(ValueVector left, ValueVector right) {
+ this (left, right, DEFAULT_FLOAT_EPSILON, DEFAULT_DOUBLE_EPSILON);
+ }
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param left left vector
+ * @param right right vector
+ * @param floatEpsilon difference for float values
+ * @param doubleEpsilon difference for double values
+ */
+ public ApproxEqualsVisitor(ValueVector left, ValueVector right, float floatEpsilon, double doubleEpsilon) {
+ this (left, right,
+ new ValueEpsilonEqualizers.Float4EpsilonEqualizer(floatEpsilon),
+ new ValueEpsilonEqualizers.Float8EpsilonEqualizer(doubleEpsilon));
+ }
+
+ /**
+ * Constructs a new instance.
+ */
+ public ApproxEqualsVisitor(ValueVector left, ValueVector right,
+ VectorValueEqualizer<Float4Vector> floatDiffFunction,
+ VectorValueEqualizer<Float8Vector> doubleDiffFunction) {
+ this (left, right, floatDiffFunction, doubleDiffFunction, DEFAULT_TYPE_COMPARATOR);
+ }
+
+ /**
+ * Constructs a new instance.
+ * @param left the left vector.
+ * @param right the right vector.
+ * @param floatDiffFunction the equalizer for float values.
+ * @param doubleDiffFunction the equalizer for double values.
+ * @param typeComparator type comparator to compare vector type.
+ */
+ public ApproxEqualsVisitor(ValueVector left, ValueVector right,
+ VectorValueEqualizer<Float4Vector> floatDiffFunction,
+ VectorValueEqualizer<Float8Vector> doubleDiffFunction,
+ BiFunction<ValueVector, ValueVector, Boolean> typeComparator) {
+ super(left, right, typeComparator);
+ this.floatDiffFunction = floatDiffFunction;
+ this.doubleDiffFunction = doubleDiffFunction;
+ }
+
+ @Override
+ public Boolean visit(BaseFixedWidthVector left, Range range) {
+ if (left instanceof Float4Vector) {
+ if (!validate(left)) {
+ return false;
+ }
+ return float4ApproxEquals(range);
+ } else if (left instanceof Float8Vector) {
+ if (!validate(left)) {
+ return false;
+ }
+ return float8ApproxEquals(range);
+ } else {
+ return super.visit(left, range);
+ }
+ }
+
+ @Override
+ protected ApproxEqualsVisitor createInnerVisitor(
+ ValueVector left, ValueVector right,
+ BiFunction<ValueVector, ValueVector, Boolean> typeComparator) {
+ return new ApproxEqualsVisitor(left, right, floatDiffFunction.clone(), doubleDiffFunction.clone(), typeComparator);
+ }
+
+ private boolean float4ApproxEquals(Range range) {
+ Float4Vector leftVector = (Float4Vector) getLeft();
+ Float4Vector rightVector = (Float4Vector) getRight();
+
+ for (int i = 0; i < range.getLength(); i++) {
+ int leftIndex = range.getLeftStart() + i;
+ int rightIndex = range.getRightStart() + i;
+
+ if (!floatDiffFunction.valuesEqual(leftVector, leftIndex, rightVector, rightIndex)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ private boolean float8ApproxEquals(Range range) {
+ Float8Vector leftVector = (Float8Vector) getLeft();
+ Float8Vector rightVector = (Float8Vector) getRight();
+
+ for (int i = 0; i < range.getLength(); i++) {
+ int leftIndex = range.getLeftStart() + i;
+ int rightIndex = range.getRightStart() + i;
+
+ if (!doubleDiffFunction.valuesEqual(leftVector, leftIndex, rightVector, rightIndex)) {
+ return false;
+ }
+ }
+ return true;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/Range.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/Range.java
new file mode 100644
index 000000000..0de99ab01
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/Range.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.compare;
+
+/**
+ * Wrapper for the parameters of comparing a range of values in two vectors.
+ */
+public class Range {
+
+ /**
+ * Start position in the left vector.
+ */
+ private int leftStart = -1;
+
+ /**
+ * Start position in the right vector.
+ */
+ private int rightStart = -1;
+
+ /**
+ * Length of the range.
+ */
+ private int length = -1;
+
+
+ /**
+ * Constructs a new instance.
+ */
+ public Range() {}
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param leftStart start index in left vector
+ * @param rightStart start index in right vector
+ * @param length length of range
+ */
+ public Range(int leftStart, int rightStart, int length) {
+ this.leftStart = leftStart;
+ this.rightStart = rightStart;
+ this.length = length;
+ }
+
+ public int getLeftStart() {
+ return leftStart;
+ }
+
+ public int getRightStart() {
+ return rightStart;
+ }
+
+ public int getLength() {
+ return length;
+ }
+
+ public Range setLeftStart(int leftStart) {
+ this.leftStart = leftStart;
+ return this;
+ }
+
+ public Range setRightStart(int rightStart) {
+ this.rightStart = rightStart;
+ return this;
+ }
+
+ public Range setLength(int length) {
+ this.length = length;
+ return this;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java
new file mode 100644
index 000000000..35b4936e3
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java
@@ -0,0 +1,563 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.compare;
+
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+
+import java.util.List;
+import java.util.function.BiFunction;
+
+import org.apache.arrow.memory.util.ByteFunctionHelpers;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.BaseFixedWidthVector;
+import org.apache.arrow.vector.BaseLargeVariableWidthVector;
+import org.apache.arrow.vector.BaseVariableWidthVector;
+import org.apache.arrow.vector.ExtensionTypeVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.complex.BaseRepeatedValueVector;
+import org.apache.arrow.vector.complex.DenseUnionVector;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.NonNullableStructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+
+/**
+ * Visitor to compare a range of values for vectors.
+ */
+public class RangeEqualsVisitor implements VectorVisitor<Boolean, Range> {
+ private ValueVector left;
+ private ValueVector right;
+
+ private BiFunction<ValueVector, ValueVector, Boolean> typeComparator;
+ private boolean typeCompareResult;
+
+ /**
+ * Default type comparator.
+ */
+ public static final BiFunction<ValueVector, ValueVector, Boolean> DEFAULT_TYPE_COMPARATOR =
+ (v1, v2) -> new TypeEqualsVisitor(v2).equals(v1);
+
+ /**
+ * Constructs a new instance with default type comparator.
+ * @param left left vector
+ * @param right right vector
+ */
+ public RangeEqualsVisitor(ValueVector left, ValueVector right) {
+ this (left, right, DEFAULT_TYPE_COMPARATOR);
+ }
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param left left vector
+ * @param right right vector
+ * @param typeComparator type comparator to compare vector type.
+ */
+ public RangeEqualsVisitor(
+ ValueVector left,
+ ValueVector right,
+ BiFunction<ValueVector, ValueVector, Boolean> typeComparator) {
+ this.left = left;
+ this.right = right;
+ this.typeComparator = typeComparator;
+
+ Preconditions.checkArgument(left != null,
+ "left vector cannot be null");
+ Preconditions.checkArgument(right != null,
+ "right vector cannot be null");
+
+ // type usually checks only once unless the left vector is changed.
+ checkType();
+ }
+
+ private void checkType() {
+ if (typeComparator == null || left == right) {
+ typeCompareResult = true;
+ } else {
+ typeCompareResult = typeComparator.apply(left, right);
+ }
+ }
+
+ /**
+ * Validate the passed left vector, if it is changed, reset and check type.
+ */
+ protected boolean validate(ValueVector left) {
+ if (left != this.left) {
+ this.left = left;
+ checkType();
+ }
+ return typeCompareResult;
+ }
+
+ /**
+ * Check range equals.
+ */
+ public boolean rangeEquals(Range range) {
+ if (!typeCompareResult) {
+ return false;
+ }
+
+ Preconditions.checkArgument(range.getLeftStart() >= 0,
+ "leftStart %s must be non negative.", range.getLeftStart());
+ Preconditions.checkArgument(range.getRightStart() >= 0,
+ "rightStart %s must be non negative.", range.getRightStart());
+
+ Preconditions.checkArgument(range.getRightStart() + range.getLength() <= right.getValueCount(),
+ "(rightStart + length) %s out of range[0, %s].", 0, right.getValueCount());
+ Preconditions.checkArgument(range.getLeftStart() + range.getLength() <= left.getValueCount(),
+ "(leftStart + length) %s out of range[0, %s].", 0, left.getValueCount());
+
+ return left.accept(this, range);
+ }
+
+ public ValueVector getLeft() {
+ return left;
+ }
+
+ public ValueVector getRight() {
+ return right;
+ }
+
+ @Override
+ public Boolean visit(BaseFixedWidthVector left, Range range) {
+ if (!validate(left)) {
+ return false;
+ }
+ return compareBaseFixedWidthVectors(range);
+ }
+
+ @Override
+ public Boolean visit(BaseVariableWidthVector left, Range range) {
+ if (!validate(left)) {
+ return false;
+ }
+ return compareBaseVariableWidthVectors(range);
+ }
+
+ @Override
+ public Boolean visit(BaseLargeVariableWidthVector left, Range range) {
+ if (!validate(left)) {
+ return false;
+ }
+ return compareBaseLargeVariableWidthVectors(range);
+ }
+
+ @Override
+ public Boolean visit(ListVector left, Range range) {
+ if (!validate(left)) {
+ return false;
+ }
+ return compareListVectors(range);
+ }
+
+ @Override
+ public Boolean visit(FixedSizeListVector left, Range range) {
+ if (!validate(left)) {
+ return false;
+ }
+ return compareFixedSizeListVectors(range);
+ }
+
+ @Override
+ public Boolean visit(LargeListVector left, Range range) {
+ if (!validate(left)) {
+ return false;
+ }
+ return compareLargeListVectors(range);
+ }
+
+ @Override
+ public Boolean visit(NonNullableStructVector left, Range range) {
+ if (!validate(left)) {
+ return false;
+ }
+ return compareStructVectors(range);
+ }
+
+ @Override
+ public Boolean visit(UnionVector left, Range range) {
+ if (!validate(left)) {
+ return false;
+ }
+ return compareUnionVectors(range);
+ }
+
+ @Override
+ public Boolean visit(DenseUnionVector left, Range range) {
+ if (!validate(left)) {
+ return false;
+ }
+ return compareDenseUnionVectors(range);
+ }
+
+ @Override
+ public Boolean visit(NullVector left, Range range) {
+ if (!validate(left)) {
+ return false;
+ }
+ return true;
+ }
+
+ @Override
+ public Boolean visit(ExtensionTypeVector<?> left, Range range) {
+ if (!(right instanceof ExtensionTypeVector<?>) || !validate(left)) {
+ return false;
+ }
+ ValueVector rightUnderlying = ((ExtensionTypeVector<?>) right).getUnderlyingVector();
+ TypeEqualsVisitor typeVisitor = new TypeEqualsVisitor(rightUnderlying);
+ RangeEqualsVisitor underlyingVisitor =
+ createInnerVisitor(left.getUnderlyingVector(), rightUnderlying, (l, r) -> typeVisitor.equals(l));
+ return underlyingVisitor.rangeEquals(range);
+ }
+
+ protected RangeEqualsVisitor createInnerVisitor(
+ ValueVector leftInner, ValueVector rightInner,
+ BiFunction<ValueVector, ValueVector, Boolean> typeComparator) {
+ return new RangeEqualsVisitor(leftInner, rightInner, typeComparator);
+ }
+
+ protected boolean compareUnionVectors(Range range) {
+ UnionVector leftVector = (UnionVector) left;
+ UnionVector rightVector = (UnionVector) right;
+
+ Range subRange = new Range(0, 0, 1);
+ for (int i = 0; i < range.getLength(); i++) {
+ subRange.setLeftStart(range.getLeftStart() + i).setRightStart(range.getRightStart() + i);
+ ValueVector leftSubVector = leftVector.getVector(range.getLeftStart() + i);
+ ValueVector rightSubVector = rightVector.getVector(range.getRightStart() + i);
+
+ if (leftSubVector == null || rightSubVector == null) {
+ if (leftSubVector == rightSubVector) {
+ continue;
+ } else {
+ return false;
+ }
+ }
+ TypeEqualsVisitor typeVisitor = new TypeEqualsVisitor(rightSubVector);
+ RangeEqualsVisitor visitor =
+ createInnerVisitor(leftSubVector, rightSubVector, (left, right) -> typeVisitor.equals(left));
+ if (!visitor.rangeEquals(subRange)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ protected boolean compareDenseUnionVectors(Range range) {
+ DenseUnionVector leftVector = (DenseUnionVector) left;
+ DenseUnionVector rightVector = (DenseUnionVector) right;
+
+ Range subRange = new Range(0, 0, 1);
+ for (int i = 0; i < range.getLength(); i++) {
+ boolean isLeftNull = leftVector.isNull(range.getLeftStart() + i);
+ boolean isRightNull = rightVector.isNull(range.getRightStart() + i);
+
+ // compare nullabilities
+ if (isLeftNull || isRightNull) {
+ if (isLeftNull != isRightNull) {
+ // exactly one slot is null, unequal
+ return false;
+ } else {
+ // both slots are null, pass this iteration
+ continue;
+ }
+ }
+
+ // compare type ids
+ byte leftTypeId = leftVector.getTypeId(range.getLeftStart() + i);
+ byte rightTypeId = rightVector.getTypeId(range.getRightStart() + i);
+
+ if (leftTypeId != rightTypeId) {
+ return false;
+ }
+
+ ValueVector leftSubVector = leftVector.getVectorByType(leftTypeId);
+ ValueVector rightSubVector = rightVector.getVectorByType(rightTypeId);
+
+ if (leftSubVector == null || rightSubVector == null) {
+ if (leftSubVector != rightSubVector) {
+ // exactly one of the sub-vectors is null, unequal
+ return false;
+ } else {
+ // both sub-vectors are null, pass this iteration
+ continue;
+ }
+ }
+
+ // compare values
+ int leftOffset = leftVector.getOffset(range.getLeftStart() + i);
+ int rightOffset = rightVector.getOffset(range.getRightStart() + i);
+ subRange.setLeftStart(leftOffset).setRightStart(rightOffset);
+ TypeEqualsVisitor typeVisitor = new TypeEqualsVisitor(rightSubVector);
+ RangeEqualsVisitor visitor =
+ createInnerVisitor(leftSubVector, rightSubVector, (left, right) -> typeVisitor.equals(left));
+ if (!visitor.rangeEquals(subRange)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ protected boolean compareStructVectors(Range range) {
+ NonNullableStructVector leftVector = (NonNullableStructVector) left;
+ NonNullableStructVector rightVector = (NonNullableStructVector) right;
+
+ List<String> leftChildNames = leftVector.getChildFieldNames();
+ if (!leftChildNames.equals(rightVector.getChildFieldNames())) {
+ return false;
+ }
+
+ for (String name : leftChildNames) {
+ RangeEqualsVisitor visitor =
+ createInnerVisitor(leftVector.getChild(name), rightVector.getChild(name), /*type comparator*/ null);
+ if (!visitor.rangeEquals(range)) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ protected boolean compareBaseFixedWidthVectors(Range range) {
+ BaseFixedWidthVector leftVector = (BaseFixedWidthVector) left;
+ BaseFixedWidthVector rightVector = (BaseFixedWidthVector) right;
+
+ for (int i = 0; i < range.getLength(); i++) {
+ int leftIndex = range.getLeftStart() + i;
+ int rightIndex = range.getRightStart() + i;
+
+ boolean isNull = leftVector.isNull(leftIndex);
+
+ if (isNull != rightVector.isNull(rightIndex)) {
+ return false;
+ }
+
+ int typeWidth = leftVector.getTypeWidth();
+ if (!isNull) {
+ int startIndexLeft = typeWidth * leftIndex;
+ int endIndexLeft = typeWidth * (leftIndex + 1);
+
+ int startIndexRight = typeWidth * rightIndex;
+ int endIndexRight = typeWidth * (rightIndex + 1);
+
+ int ret = ByteFunctionHelpers.equal(leftVector.getDataBuffer(), startIndexLeft, endIndexLeft,
+ rightVector.getDataBuffer(), startIndexRight, endIndexRight);
+
+ if (ret == 0) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ protected boolean compareBaseVariableWidthVectors(Range range) {
+ BaseVariableWidthVector leftVector = (BaseVariableWidthVector) left;
+ BaseVariableWidthVector rightVector = (BaseVariableWidthVector) right;
+
+ for (int i = 0; i < range.getLength(); i++) {
+ int leftIndex = range.getLeftStart() + i;
+ int rightIndex = range.getRightStart() + i;
+
+ boolean isNull = leftVector.isNull(leftIndex);
+ if (isNull != rightVector.isNull(rightIndex)) {
+ return false;
+ }
+
+ int offsetWidth = BaseVariableWidthVector.OFFSET_WIDTH;
+
+ if (!isNull) {
+ final int startIndexLeft = leftVector.getOffsetBuffer().getInt(leftIndex * offsetWidth);
+ final int endIndexLeft = leftVector.getOffsetBuffer().getInt((leftIndex + 1) * offsetWidth);
+
+ final int startIndexRight = rightVector.getOffsetBuffer().getInt(rightIndex * offsetWidth);
+ final int endIndexRight = rightVector.getOffsetBuffer().getInt((rightIndex + 1) * offsetWidth);
+
+ int ret = ByteFunctionHelpers.equal(leftVector.getDataBuffer(), startIndexLeft, endIndexLeft,
+ rightVector.getDataBuffer(), startIndexRight, endIndexRight);
+
+ if (ret == 0) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ protected boolean compareBaseLargeVariableWidthVectors(Range range) {
+ BaseLargeVariableWidthVector leftVector = (BaseLargeVariableWidthVector) left;
+ BaseLargeVariableWidthVector rightVector = (BaseLargeVariableWidthVector) right;
+
+ for (int i = 0; i < range.getLength(); i++) {
+ int leftIndex = range.getLeftStart() + i;
+ int rightIndex = range.getRightStart() + i;
+
+ boolean isNull = leftVector.isNull(leftIndex);
+ if (isNull != rightVector.isNull(rightIndex)) {
+ return false;
+ }
+
+ int offsetWidth = BaseLargeVariableWidthVector.OFFSET_WIDTH;
+
+ if (!isNull) {
+ final long startIndexLeft = leftVector.getOffsetBuffer().getLong((long) leftIndex * offsetWidth);
+ final long endIndexLeft = leftVector.getOffsetBuffer().getLong((long) (leftIndex + 1) * offsetWidth);
+
+ final long startIndexRight = rightVector.getOffsetBuffer().getLong((long) rightIndex * offsetWidth);
+ final long endIndexRight = rightVector.getOffsetBuffer().getLong((long) (rightIndex + 1) * offsetWidth);
+
+ int ret = ByteFunctionHelpers.equal(leftVector.getDataBuffer(), startIndexLeft, endIndexLeft,
+ rightVector.getDataBuffer(), startIndexRight, endIndexRight);
+
+ if (ret == 0) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ protected boolean compareListVectors(Range range) {
+ ListVector leftVector = (ListVector) left;
+ ListVector rightVector = (ListVector) right;
+
+ RangeEqualsVisitor innerVisitor =
+ createInnerVisitor(leftVector.getDataVector(), rightVector.getDataVector(), /*type comparator*/ null);
+ Range innerRange = new Range();
+
+ for (int i = 0; i < range.getLength(); i++) {
+ int leftIndex = range.getLeftStart() + i;
+ int rightIndex = range.getRightStart() + i;
+
+ boolean isNull = leftVector.isNull(leftIndex);
+ if (isNull != rightVector.isNull(rightIndex)) {
+ return false;
+ }
+
+ int offsetWidth = BaseRepeatedValueVector.OFFSET_WIDTH;
+
+ if (!isNull) {
+ final int startIndexLeft = leftVector.getOffsetBuffer().getInt(leftIndex * offsetWidth);
+ final int endIndexLeft = leftVector.getOffsetBuffer().getInt((leftIndex + 1) * offsetWidth);
+
+ final int startIndexRight = rightVector.getOffsetBuffer().getInt(rightIndex * offsetWidth);
+ final int endIndexRight = rightVector.getOffsetBuffer().getInt((rightIndex + 1) * offsetWidth);
+
+ if ((endIndexLeft - startIndexLeft) != (endIndexRight - startIndexRight)) {
+ return false;
+ }
+
+ innerRange = innerRange
+ .setRightStart(startIndexRight)
+ .setLeftStart(startIndexLeft)
+ .setLength(endIndexLeft - startIndexLeft);
+ if (!innerVisitor.rangeEquals(innerRange)) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ protected boolean compareFixedSizeListVectors(Range range) {
+ FixedSizeListVector leftVector = (FixedSizeListVector) left;
+ FixedSizeListVector rightVector = (FixedSizeListVector) right;
+
+ if (leftVector.getListSize() != rightVector.getListSize()) {
+ return false;
+ }
+
+ int listSize = leftVector.getListSize();
+ RangeEqualsVisitor innerVisitor =
+ createInnerVisitor(leftVector.getDataVector(), rightVector.getDataVector(), /*type comparator*/ null);
+ Range innerRange = new Range(0, 0, listSize);
+
+ for (int i = 0; i < range.getLength(); i++) {
+ int leftIndex = range.getLeftStart() + i;
+ int rightIndex = range.getRightStart() + i;
+
+ boolean isNull = leftVector.isNull(leftIndex);
+ if (isNull != rightVector.isNull(rightIndex)) {
+ return false;
+ }
+
+ if (!isNull) {
+ final int startIndexLeft = leftIndex * listSize;
+ final int endIndexLeft = (leftIndex + 1) * listSize;
+
+ final int startIndexRight = rightIndex * listSize;
+ final int endIndexRight = (rightIndex + 1) * listSize;
+
+ if ((endIndexLeft - startIndexLeft) != (endIndexRight - startIndexRight)) {
+ return false;
+ }
+
+ innerRange = innerRange.setLeftStart(startIndexLeft)
+ .setRightStart(startIndexRight);
+ if (!innerVisitor.rangeEquals(innerRange)) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ protected boolean compareLargeListVectors(Range range) {
+ LargeListVector leftVector = (LargeListVector) left;
+ LargeListVector rightVector = (LargeListVector) right;
+
+ RangeEqualsVisitor innerVisitor =
+ createInnerVisitor(leftVector.getDataVector(), rightVector.getDataVector(), /*type comparator*/ null);
+ Range innerRange = new Range();
+
+ for (int i = 0; i < range.getLength(); i++) {
+ int leftIndex = range.getLeftStart() + i;
+ int rightIndex = range.getRightStart() + i;
+
+ boolean isNull = leftVector.isNull(leftIndex);
+ if (isNull != rightVector.isNull(rightIndex)) {
+ return false;
+ }
+
+ long offsetWidth = LargeListVector.OFFSET_WIDTH;
+
+ if (!isNull) {
+ final long startIndexLeft = leftVector.getOffsetBuffer().getLong((long) leftIndex * offsetWidth);
+ final long endIndexLeft = leftVector.getOffsetBuffer().getLong((long) (leftIndex + 1) * offsetWidth);
+
+ final long startIndexRight = rightVector.getOffsetBuffer().getLong((long) rightIndex * offsetWidth);
+ final long endIndexRight = rightVector.getOffsetBuffer().getLong((long) (rightIndex + 1) * offsetWidth);
+
+ if ((endIndexLeft - startIndexLeft) != (endIndexRight - startIndexRight)) {
+ return false;
+ }
+
+ innerRange = innerRange // TODO revisit these casts when long indexing is finished
+ .setRightStart(checkedCastToInt(startIndexRight))
+ .setLeftStart(checkedCastToInt(startIndexLeft))
+ .setLength(checkedCastToInt(endIndexLeft - startIndexLeft));
+ if (!innerVisitor.rangeEquals(innerRange)) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/TypeEqualsVisitor.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/TypeEqualsVisitor.java
new file mode 100644
index 000000000..443ee1f96
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/TypeEqualsVisitor.java
@@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.compare;
+
+import java.util.List;
+import java.util.Objects;
+
+import org.apache.arrow.vector.BaseFixedWidthVector;
+import org.apache.arrow.vector.BaseLargeVariableWidthVector;
+import org.apache.arrow.vector.BaseVariableWidthVector;
+import org.apache.arrow.vector.ExtensionTypeVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.complex.DenseUnionVector;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.NonNullableStructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.types.pojo.Field;
+
+/**
+ * Visitor to compare type equals for vectors.
+ */
+public class TypeEqualsVisitor implements VectorVisitor<Boolean, Void> {
+
+ private final ValueVector right;
+
+ private final boolean checkName;
+ private final boolean checkMetadata;
+
+ /**
+ * Construct an instance.
+ */
+ public TypeEqualsVisitor(ValueVector right) {
+ this (right, true, true);
+ }
+
+ /**
+ * Construct an instance.
+ * @param right right vector
+ * @param checkName whether checks names
+ * @param checkMetadata whether checks metadata
+ */
+ public TypeEqualsVisitor(ValueVector right, boolean checkName, boolean checkMetadata) {
+ this.right = right;
+ this.checkName = checkName;
+ this.checkMetadata = checkMetadata;
+ }
+
+ /**
+ * Check type equals without passing IN param in VectorVisitor.
+ */
+ public boolean equals(ValueVector left) {
+ return left.accept(this, null);
+ }
+
+ @Override
+ public Boolean visit(BaseFixedWidthVector left, Void value) {
+ return compareField(left.getField(), right.getField());
+ }
+
+ @Override
+ public Boolean visit(BaseVariableWidthVector left, Void value) {
+ return compareField(left.getField(), right.getField());
+ }
+
+ @Override
+ public Boolean visit(BaseLargeVariableWidthVector left, Void value) {
+ return compareField(left.getField(), right.getField());
+ }
+
+ @Override
+ public Boolean visit(ListVector left, Void value) {
+ return compareField(left.getField(), right.getField());
+ }
+
+ @Override
+ public Boolean visit(FixedSizeListVector left, Void value) {
+ return compareField(left.getField(), right.getField());
+ }
+
+ @Override
+ public Boolean visit(LargeListVector left, Void value) {
+ return compareField(left.getField(), right.getField());
+ }
+
+ @Override
+ public Boolean visit(NonNullableStructVector left, Void value) {
+ return compareField(left.getField(), right.getField());
+ }
+
+ @Override
+ public Boolean visit(UnionVector left, Void value) {
+ return compareField(left.getField(), right.getField());
+ }
+
+ @Override
+ public Boolean visit(DenseUnionVector left, Void value) {
+ return compareField(left.getField(), right.getField());
+ }
+
+ @Override
+ public Boolean visit(NullVector left, Void value) {
+ return compareField(left.getField(), right.getField());
+ }
+
+ @Override
+ public Boolean visit(ExtensionTypeVector<?> left, Void value) {
+ return compareField(left.getField(), right.getField());
+ }
+
+ private boolean compareField(Field leftField, Field rightField) {
+
+ if (leftField == rightField) {
+ return true;
+ }
+
+ return (!checkName || Objects.equals(leftField.getName(), rightField.getName())) &&
+ Objects.equals(leftField.isNullable(), rightField.isNullable()) &&
+ Objects.equals(leftField.getType(), rightField.getType()) &&
+ Objects.equals(leftField.getDictionary(), rightField.getDictionary()) &&
+ (!checkMetadata || Objects.equals(leftField.getMetadata(), rightField.getMetadata())) &&
+ compareChildren(leftField.getChildren(), rightField.getChildren());
+ }
+
+ private boolean compareChildren(List<Field> leftChildren, List<Field> rightChildren) {
+ if (leftChildren.size() != rightChildren.size()) {
+ return false;
+ }
+
+ for (int i = 0; i < leftChildren.size(); i++) {
+ if (!compareField(leftChildren.get(i), rightChildren.get(i))) {
+ return false;
+ }
+ }
+ return true;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorEqualsVisitor.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorEqualsVisitor.java
new file mode 100644
index 000000000..390d13854
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorEqualsVisitor.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.compare;
+
+import static org.apache.arrow.vector.compare.RangeEqualsVisitor.DEFAULT_TYPE_COMPARATOR;
+
+import java.util.function.BiFunction;
+
+import org.apache.arrow.vector.ValueVector;
+
+/**
+ * Visitor to compare vectors equal.
+ */
+public class VectorEqualsVisitor {
+
+ /**
+ * Checks if two vectors are equals with default type comparator.
+ * @param left the left vector to compare.
+ * @param right the right vector to compare.
+ * @return true if the vectors are equal, and false otherwise.
+ */
+ public static boolean vectorEquals(ValueVector left, ValueVector right) {
+ return vectorEquals(left, right, DEFAULT_TYPE_COMPARATOR);
+ }
+
+ /**
+ * Checks if two vectors are equals.
+ * @param left the left vector to compare.
+ * @param right the right vector to compare.
+ * @param typeComparator type comparator to compare vector type.
+ * @return true if the vectors are equal, and false otherwise.
+ */
+ public static boolean vectorEquals(
+ ValueVector left,
+ ValueVector right,
+ BiFunction<ValueVector, ValueVector, Boolean> typeComparator) {
+
+ if (left.getValueCount() != right.getValueCount()) {
+ return false;
+ }
+
+ RangeEqualsVisitor visitor = new RangeEqualsVisitor(left, right, typeComparator);
+ return visitor.rangeEquals(new Range(0, 0, left.getValueCount()));
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorValueEqualizer.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorValueEqualizer.java
new file mode 100644
index 000000000..4f9c1a95e
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorValueEqualizer.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.compare;
+
+import org.apache.arrow.vector.ValueVector;
+
+/**
+ * A function to determine if two vectors are equal at specified positions.
+ * @param <V> the vector type.
+ */
+public interface VectorValueEqualizer<V extends ValueVector> extends Cloneable {
+
+ /**
+ * Checks if the vectors are equal at the given positions, given that the values
+ * at both positions are non-null.
+ * @param vector1 the first vector.
+ * @param index1 index in the first vector.
+ * @param vector2 the second vector.
+ * @param index2 index in the second vector.
+ * @return true if the two values are considered to be equal, and false otherwise.
+ */
+ boolean valuesEqual(V vector1, int index1, V vector2, int index2);
+
+ /**
+ * Creates a equalizer of the same type.
+ * @return the newly created equalizer.
+ */
+ VectorValueEqualizer<V> clone();
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorVisitor.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorVisitor.java
new file mode 100644
index 000000000..aee090706
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorVisitor.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.compare;
+
+import org.apache.arrow.vector.BaseFixedWidthVector;
+import org.apache.arrow.vector.BaseLargeVariableWidthVector;
+import org.apache.arrow.vector.BaseVariableWidthVector;
+import org.apache.arrow.vector.ExtensionTypeVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.complex.DenseUnionVector;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.NonNullableStructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+
+/**
+ * Generic visitor to visit a {@link org.apache.arrow.vector.ValueVector}.
+ * @param <OUT> the output result type.
+ * @param <IN> the input data together with visitor.
+ */
+public interface VectorVisitor<OUT, IN> {
+
+ OUT visit(BaseFixedWidthVector left, IN value);
+
+ OUT visit(BaseVariableWidthVector left, IN value);
+
+ OUT visit(BaseLargeVariableWidthVector left, IN value);
+
+ OUT visit(ListVector left, IN value);
+
+ OUT visit(FixedSizeListVector left, IN value);
+
+ OUT visit(LargeListVector left, IN value);
+
+ OUT visit(NonNullableStructVector left, IN value);
+
+ OUT visit(UnionVector left, IN value);
+
+ OUT visit(DenseUnionVector left, IN value);
+
+ OUT visit(NullVector left, IN value);
+
+ OUT visit(ExtensionTypeVector<?> left, IN value);
+}
+
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/util/ValueEpsilonEqualizers.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/util/ValueEpsilonEqualizers.java
new file mode 100644
index 000000000..a7b6a8ca4
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/util/ValueEpsilonEqualizers.java
@@ -0,0 +1,149 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.compare.util;
+
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.FloatingPointVector;
+import org.apache.arrow.vector.compare.VectorValueEqualizer;
+
+/**
+ * Vector value equalizers that regard values as equal if their difference
+ * is within a small threshold (epsilon).
+ */
+public class ValueEpsilonEqualizers {
+
+ private ValueEpsilonEqualizers() {
+
+ }
+
+ /**
+ * Difference function for floating point values.
+ */
+ public static class FloatingPointEpsilonEqualizer implements VectorValueEqualizer<FloatingPointVector> {
+ private final double epsilon;
+
+ public FloatingPointEpsilonEqualizer(double epsilon) {
+ this.epsilon = epsilon;
+ }
+
+ @Override
+ public final boolean valuesEqual(
+ FloatingPointVector vector1, int index1, FloatingPointVector vector2, int index2) {
+ boolean isNull1 = vector1.isNull(index1);
+ boolean isNull2 = vector2.isNull(index2);
+
+ if (isNull1 || isNull2) {
+ return isNull1 == isNull2;
+ }
+
+ double d1 = vector1.getValueAsDouble(index1);
+ double d2 = vector2.getValueAsDouble(index2);
+
+ if (Double.isNaN(d1)) {
+ return Double.isNaN(d2);
+ }
+ if (Double.isInfinite(d1)) {
+ return Double.isInfinite(d2) && Math.signum(d1) == Math.signum(d2);
+ }
+
+ return Math.abs(d1 - d2) <= epsilon;
+ }
+
+ @Override
+ public VectorValueEqualizer<FloatingPointVector> clone() {
+ return new FloatingPointEpsilonEqualizer(epsilon);
+ }
+ }
+
+ /**
+ * Difference function for float values.
+ */
+ public static class Float4EpsilonEqualizer implements VectorValueEqualizer<Float4Vector> {
+ private final float epsilon;
+
+ public Float4EpsilonEqualizer(float epsilon) {
+ this.epsilon = epsilon;
+ }
+
+ @Override
+ public final boolean valuesEqual(Float4Vector vector1, int index1, Float4Vector vector2, int index2) {
+ boolean isNull1 = vector1.isNull(index1);
+ boolean isNull2 = vector2.isNull(index2);
+
+ if (isNull1 || isNull2) {
+ return isNull1 == isNull2;
+ }
+
+ float f1 = vector1.get(index1);
+ float f2 = vector2.get(index2);
+
+ if (Float.isNaN(f1)) {
+ return Float.isNaN(f2);
+ }
+ if (Float.isInfinite(f1)) {
+ return Float.isInfinite(f2) && Math.signum(f1) == Math.signum(f2);
+ }
+
+ return Math.abs(f1 - f2) <= epsilon;
+ }
+
+ @Override
+ public VectorValueEqualizer<Float4Vector> clone() {
+ return new Float4EpsilonEqualizer(epsilon);
+ }
+ }
+
+ /**
+ * Difference function for double values.
+ */
+ public static class Float8EpsilonEqualizer implements VectorValueEqualizer<Float8Vector> {
+ private final double epsilon;
+
+ public Float8EpsilonEqualizer(double epsilon) {
+ this.epsilon = epsilon;
+ }
+
+ @Override
+ public final boolean valuesEqual(Float8Vector vector1, int index1, Float8Vector vector2, int index2) {
+ boolean isNull1 = vector1.isNull(index1);
+ boolean isNull2 = vector2.isNull(index2);
+
+ if (isNull1 || isNull2) {
+ return isNull1 == isNull2;
+ }
+
+ double d1 = vector1.get(index1);
+ double d2 = vector2.get(index2);
+
+ if (Double.isNaN(d1)) {
+ return Double.isNaN(d2);
+ }
+ if (Double.isInfinite(d1)) {
+ return Double.isInfinite(d2) && Math.signum(d1) == Math.signum(d2);
+ }
+
+ return Math.abs(d1 - d2) <= epsilon;
+ }
+
+ @Override
+ public VectorValueEqualizer<Float8Vector> clone() {
+ return new Float8EpsilonEqualizer(epsilon);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java
new file mode 100644
index 000000000..898bfe3d3
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.OutOfMemoryException;
+import org.apache.arrow.vector.DensityAwareVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeList;
+import org.apache.arrow.vector.types.pojo.ArrowType.List;
+import org.apache.arrow.vector.types.pojo.ArrowType.Struct;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.CallBack;
+
+/**
+ * Base class for composite vectors.
+ *
+ * <p>This class implements common functionality of composite vectors.
+ */
+public abstract class AbstractContainerVector implements ValueVector, DensityAwareVector {
+ static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(AbstractContainerVector.class);
+
+ protected final String name;
+ protected final BufferAllocator allocator;
+ protected final CallBack callBack;
+
+ protected AbstractContainerVector(String name, BufferAllocator allocator, CallBack callBack) {
+ this.name = name;
+ this.allocator = allocator;
+ this.callBack = callBack;
+ }
+
+ @Override
+ public void allocateNew() throws OutOfMemoryException {
+ if (!allocateNewSafe()) {
+ throw new OutOfMemoryException();
+ }
+ }
+
+ public BufferAllocator getAllocator() {
+ return allocator;
+ }
+
+ /**
+ * Returns a {@link org.apache.arrow.vector.ValueVector} corresponding to the given field name if exists or null.
+ *
+ * @param name the name of the child to return
+ * @return the corresponding FieldVector
+ */
+ public FieldVector getChild(String name) {
+ return getChild(name, FieldVector.class);
+ }
+
+ /**
+ * Clears out all underlying child vectors.
+ */
+ @Override
+ public void close() {
+ for (ValueVector vector : (Iterable<ValueVector>) this) {
+ vector.close();
+ }
+ }
+
+ protected <T extends ValueVector> T typeify(ValueVector v, Class<T> clazz) {
+ if (clazz.isAssignableFrom(v.getClass())) {
+ return clazz.cast(v);
+ }
+ throw new IllegalStateException(String.format("Vector requested [%s] was different than type stored [%s]. Arrow " +
+ "doesn't yet support heterogeneous types.", clazz.getSimpleName(), v.getClass().getSimpleName()));
+ }
+
+ protected boolean supportsDirectRead() {
+ return false;
+ }
+
+ // return the number of child vectors
+ public abstract int size();
+
+ // add a new vector with the input FieldType or return the existing vector if we already added one with the same name
+ public abstract <T extends FieldVector> T addOrGet(String name, FieldType fieldType, Class<T> clazz);
+
+ // return the child vector with the input name
+ public abstract <T extends FieldVector> T getChild(String name, Class<T> clazz);
+
+ // return the child vector's ordinal in the composite container
+ public abstract VectorWithOrdinal getChildVectorWithOrdinal(String name);
+
+ public StructVector addOrGetStruct(String name) {
+ return addOrGet(name, FieldType.nullable(new Struct()), StructVector.class);
+ }
+
+ public ListVector addOrGetList(String name) {
+ return addOrGet(name, FieldType.nullable(new List()), ListVector.class);
+ }
+
+ public UnionVector addOrGetUnion(String name) {
+ return addOrGet(name, FieldType.nullable(MinorType.UNION.getType()), UnionVector.class);
+ }
+
+ public FixedSizeListVector addOrGetFixedSizeList(String name, int listSize) {
+ return addOrGet(name, FieldType.nullable(new FixedSizeList(listSize)), FixedSizeListVector.class);
+ }
+
+ public MapVector addOrGetMap(String name, boolean keysSorted) {
+ return addOrGet(name, FieldType.nullable(new ArrowType.Map(keysSorted)), MapVector.class);
+ }
+
+ @Override
+ public void copyFrom(int fromIndex, int thisIndex, ValueVector from) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public String getName() {
+ return name;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java
new file mode 100644
index 000000000..be6d99233
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java
@@ -0,0 +1,425 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.BitVectorHelper;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.PromotableMultiMapWithOrdinal;
+import org.apache.arrow.vector.util.ValueVectorUtility;
+
+/**
+ * Base class for StructVectors. Currently used by NonNullableStructVector
+ */
+public abstract class AbstractStructVector extends AbstractContainerVector {
+ private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(AbstractContainerVector.class);
+ private static final String STRUCT_CONFLICT_POLICY_ENV = "ARROW_STRUCT_CONFLICT_POLICY";
+ private static final String STRUCT_CONFLICT_POLICY_JVM = "arrow.struct.conflict.policy";
+ private static final ConflictPolicy DEFAULT_CONFLICT_POLICY;
+ // Maintains a map with key as field name and value is the vector itself
+ private final PromotableMultiMapWithOrdinal<String, FieldVector> vectors;
+ protected final boolean allowConflictPolicyChanges;
+ private ConflictPolicy conflictPolicy;
+
+
+ static {
+ String conflictPolicyStr = System.getProperty(STRUCT_CONFLICT_POLICY_JVM,
+ ConflictPolicy.CONFLICT_REPLACE.toString());
+ if (conflictPolicyStr == null) {
+ conflictPolicyStr = System.getenv(STRUCT_CONFLICT_POLICY_ENV);
+ }
+ ConflictPolicy conflictPolicy;
+ try {
+ conflictPolicy = ConflictPolicy.valueOf(conflictPolicyStr.toUpperCase());
+ } catch (Exception e) {
+ conflictPolicy = ConflictPolicy.CONFLICT_REPLACE;
+ }
+ DEFAULT_CONFLICT_POLICY = conflictPolicy;
+ }
+
+ /**
+ * Policy to determine how to react when duplicate columns are encountered.
+ */
+ public enum ConflictPolicy {
+ // Ignore the conflict and append the field. This is the default behaviour
+ CONFLICT_APPEND,
+ // Keep the existing field and ignore the newer one.
+ CONFLICT_IGNORE,
+ // Replace the existing field with the newer one.
+ CONFLICT_REPLACE,
+ // Refuse the new field and error out.
+ CONFLICT_ERROR
+ }
+
+ /**
+ * Base coonstructor that sets default conflict policy to APPEND.
+ */
+ protected AbstractStructVector(String name,
+ BufferAllocator allocator,
+ CallBack callBack,
+ ConflictPolicy conflictPolicy,
+ boolean allowConflictPolicyChanges) {
+ super(name, allocator, callBack);
+ this.conflictPolicy = conflictPolicy == null ? DEFAULT_CONFLICT_POLICY : conflictPolicy;
+ this.vectors = new PromotableMultiMapWithOrdinal<>(allowConflictPolicyChanges, this.conflictPolicy);
+ this.allowConflictPolicyChanges = allowConflictPolicyChanges;
+ }
+
+ /**
+ * Set conflict policy and return last conflict policy state.
+ */
+ public ConflictPolicy setConflictPolicy(ConflictPolicy conflictPolicy) {
+ ConflictPolicy tmp = this.conflictPolicy;
+ this.conflictPolicy = conflictPolicy;
+ this.vectors.setConflictPolicy(conflictPolicy);
+ return tmp;
+ }
+
+ public ConflictPolicy getConflictPolicy() {
+ return conflictPolicy;
+ }
+
+ @Override
+ public void close() {
+ for (final ValueVector valueVector : vectors.values()) {
+ valueVector.close();
+ }
+ vectors.clear();
+
+ super.close();
+ }
+
+ @Override
+ public boolean allocateNewSafe() {
+ /* boolean to keep track if all the memory allocation were successful
+ * Used in the case of composite vectors when we need to allocate multiple
+ * buffers for multiple vectors. If one of the allocations failed we need to
+ * clear all the memory that we allocated
+ */
+ boolean success = false;
+ try {
+ for (final ValueVector v : vectors.values()) {
+ if (!v.allocateNewSafe()) {
+ return false;
+ }
+ }
+ success = true;
+ } finally {
+ if (!success) {
+ clear();
+ }
+ }
+ return true;
+ }
+
+ @Override
+ public void reAlloc() {
+ for (final ValueVector v : vectors.values()) {
+ v.reAlloc();
+ }
+ }
+
+ /**
+ * Adds a new field with the given parameters or replaces the existing one and consequently returns the resultant
+ * {@link org.apache.arrow.vector.ValueVector}.
+ *
+ * <p>Execution takes place in the following order:
+ * <ul>
+ * <li>
+ * if field is new, create and insert a new vector of desired type.
+ * </li>
+ * <li>
+ * if field exists and existing vector is of desired vector type, return the vector.
+ * </li>
+ * <li>
+ * if field exists and null filled, clear the existing vector; create and insert a new vector of desired type.
+ * </li>
+ * <li>
+ * otherwise, throw an {@link java.lang.IllegalStateException}
+ * </li>
+ * </ul>
+ *
+ * @param childName the name of the field
+ * @param fieldType the type for the vector
+ * @param clazz class of expected vector type
+ * @param <T> class type of expected vector type
+ * @return resultant {@link org.apache.arrow.vector.ValueVector}
+ * @throws java.lang.IllegalStateException raised if there is a hard schema change
+ */
+ public <T extends FieldVector> T addOrGet(String childName, FieldType fieldType, Class<T> clazz) {
+ final ValueVector existing = getChild(childName);
+ boolean create = false;
+ if (existing == null) {
+ create = true;
+ } else if (clazz.isAssignableFrom(existing.getClass())) {
+ return clazz.cast(existing);
+ } else if (nullFilled(existing)) {
+ existing.clear();
+ create = true;
+ }
+ if (create) {
+ final T vector = clazz.cast(fieldType.createNewSingleVector(childName, allocator, callBack));
+ putChild(childName, vector);
+ if (callBack != null) {
+ callBack.doWork();
+ }
+ return vector;
+ }
+ final String message = "Arrow does not support schema change yet. Existing[%s] and desired[%s] vector types " +
+ "mismatch";
+ throw new IllegalStateException(String.format(message, existing.getClass().getSimpleName(), clazz.getSimpleName()));
+ }
+
+ private boolean nullFilled(ValueVector vector) {
+ return BitVectorHelper.checkAllBitsEqualTo(vector.getValidityBuffer(), vector.getValueCount(), false);
+ }
+
+ /**
+ * Returns a {@link org.apache.arrow.vector.ValueVector} corresponding to the given ordinal identifier.
+ *
+ * @param id the ordinal of the child to return
+ * @return the corresponding child
+ */
+ public ValueVector getChildByOrdinal(int id) {
+ return vectors.getByOrdinal(id);
+ }
+
+ /**
+ * Returns a {@link org.apache.arrow.vector.ValueVector} instance of subtype of T corresponding to the given
+ * field name if exists or null.
+ *
+ * If there is more than one element for name this will return the first inserted.
+ *
+ * @param name the name of the child to return
+ * @param clazz the expected type of the child
+ * @return the child corresponding to this name
+ */
+ @Override
+ public <T extends FieldVector> T getChild(String name, Class<T> clazz) {
+ final FieldVector f = vectors.get(name);
+ if (f == null) {
+ return null;
+ }
+ return typeify(f, clazz);
+ }
+
+ protected ValueVector add(String childName, FieldType fieldType) {
+ FieldVector vector = fieldType.createNewSingleVector(childName, allocator, callBack);
+ putChild(childName, vector);
+ if (callBack != null) {
+ callBack.doWork();
+ }
+ return vector;
+ }
+
+ /**
+ * Inserts the vector with the given name if it does not exist else replaces it with the new value.
+ *
+ * <p>Note that this method does not enforce any vector type check nor throws a schema change exception.
+ *
+ * @param name the name of the child to add
+ * @param vector the vector to add as a child
+ */
+ protected void putChild(String name, FieldVector vector) {
+ putVector(name, vector);
+ }
+
+ private void put(String name, FieldVector vector, boolean overwrite) {
+ final boolean old = vectors.put(
+ Preconditions.checkNotNull(name, "field name cannot be null"),
+ Preconditions.checkNotNull(vector, "vector cannot be null"),
+ overwrite
+ );
+ if (old) {
+ logger.debug("Field [{}] mutated to [{}] ", name,
+ vector.getClass().getSimpleName());
+ }
+ }
+
+ /**
+ * Inserts the input vector into the map if it does not exist.
+ *
+ * <p>
+ * If the field name already exists the conflict is handled according to the currently set ConflictPolicy
+ * </p>
+ *
+ * @param name field name
+ * @param vector vector to be inserted
+ */
+ protected void putVector(String name, FieldVector vector) {
+ switch (conflictPolicy) {
+ case CONFLICT_APPEND:
+ put(name, vector, false);
+ break;
+ case CONFLICT_IGNORE:
+ if (!vectors.containsKey(name)) {
+ put(name, vector, false);
+ }
+ break;
+ case CONFLICT_REPLACE:
+ if (vectors.containsKey(name)) {
+ vectors.removeAll(name);
+ }
+ put(name, vector, true);
+ break;
+ case CONFLICT_ERROR:
+ if (vectors.containsKey(name)) {
+ throw new IllegalStateException(String.format("Vector already exists: Existing[%s], Requested[%s] ",
+ vector.getClass().getSimpleName(), vector.getField().getFieldType()));
+ }
+ put(name, vector, false);
+ break;
+ default:
+ throw new IllegalStateException(String.format("%s type not a valid conflict state", conflictPolicy));
+ }
+
+ }
+
+ /**
+ * Get child vectors.
+ * @return a sequence of underlying child vectors.
+ */
+ protected List<FieldVector> getChildren() {
+ int size = vectors.size();
+ List<FieldVector> children = new ArrayList<>();
+ for (int i = 0; i < size; i++) {
+ children.add(vectors.getByOrdinal(i));
+ }
+ return children;
+ }
+
+ /**
+ * Get child field names.
+ */
+ public List<String> getChildFieldNames() {
+ return getChildren().stream()
+ .map(child -> child.getField().getName())
+ .collect(Collectors.toList());
+ }
+
+ /**
+ * Get the number of child vectors.
+ * @return the number of underlying child vectors.
+ */
+ @Override
+ public int size() {
+ return vectors.size();
+ }
+
+ @Override
+ public Iterator<ValueVector> iterator() {
+ return Collections.<ValueVector>unmodifiableCollection(vectors.values()).iterator();
+ }
+
+ /**
+ * Get primitive child vectors.
+ * @return a list of scalar child vectors recursing the entire vector hierarchy.
+ */
+ public List<ValueVector> getPrimitiveVectors() {
+ final List<ValueVector> primitiveVectors = new ArrayList<>();
+ for (final FieldVector v : vectors.values()) {
+ primitiveVectors.addAll(getPrimitiveVectors(v));
+ }
+ return primitiveVectors;
+ }
+
+ private List<ValueVector> getPrimitiveVectors(FieldVector v) {
+ final List<ValueVector> primitives = new ArrayList<>();
+ if (v instanceof AbstractStructVector) {
+ AbstractStructVector structVector = (AbstractStructVector) v;
+ primitives.addAll(structVector.getPrimitiveVectors());
+ } else if (v instanceof ListVector) {
+ ListVector listVector = (ListVector) v;
+ primitives.addAll(getPrimitiveVectors(listVector.getDataVector()));
+ } else if (v instanceof FixedSizeListVector) {
+ FixedSizeListVector listVector = (FixedSizeListVector) v;
+ primitives.addAll(getPrimitiveVectors(listVector.getDataVector()));
+ } else if (v instanceof UnionVector) {
+ UnionVector unionVector = (UnionVector) v;
+ for (final FieldVector vector : unionVector.getChildrenFromFields()) {
+ primitives.addAll(getPrimitiveVectors(vector));
+ }
+ } else {
+ primitives.add(v);
+ }
+ return primitives;
+ }
+
+ /**
+ * Get a child vector by name. If duplicate names this returns the first inserted.
+ * @param name the name of the child to return
+ * @return a vector with its corresponding ordinal mapping if field exists or null.
+ */
+ @Override
+ public VectorWithOrdinal getChildVectorWithOrdinal(String name) {
+ final int ordinal = vectors.getOrdinal(name);
+ if (ordinal < 0) {
+ return null;
+ }
+ final ValueVector vector = vectors.getByOrdinal(ordinal);
+ return new VectorWithOrdinal(vector, ordinal);
+ }
+
+ @Override
+ public ArrowBuf[] getBuffers(boolean clear) {
+ final List<ArrowBuf> buffers = new ArrayList<>();
+
+ for (final ValueVector vector : vectors.values()) {
+ for (final ArrowBuf buf : vector.getBuffers(false)) {
+ buffers.add(buf);
+ if (clear) {
+ buf.getReferenceManager().retain(1);
+ }
+ }
+ if (clear) {
+ vector.clear();
+ }
+ }
+
+ return buffers.toArray(new ArrowBuf[buffers.size()]);
+ }
+
+ @Override
+ public int getBufferSize() {
+ int actualBufSize = 0;
+
+ for (final ValueVector v : vectors.values()) {
+ for (final ArrowBuf buf : v.getBuffers(false)) {
+ actualBufSize += buf.writerIndex();
+ }
+ }
+ return actualBufSize;
+ }
+
+ @Override
+ public String toString() {
+ return ValueVectorUtility.getToString(this, 0 , getValueCount());
+ }
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseListVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseListVector.java
new file mode 100644
index 000000000..5f547b901
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseListVector.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex;
+
+import org.apache.arrow.vector.FieldVector;
+
+/**
+ * Abstraction for all list type vectors.
+ */
+public interface BaseListVector extends FieldVector {
+
+ /**
+ * Get data vector start index with the given list index.
+ */
+ int getElementStartIndex(int index);
+
+ /**
+ * Get data vector end index with the given list index.
+ */
+ int getElementEndIndex(int index);
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java
new file mode 100644
index 000000000..62d4a1299
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java
@@ -0,0 +1,367 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex;
+
+import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.util.CommonUtil;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.AddOrGetResult;
+import org.apache.arrow.vector.BaseFixedWidthVector;
+import org.apache.arrow.vector.BaseValueVector;
+import org.apache.arrow.vector.BaseVariableWidthVector;
+import org.apache.arrow.vector.DensityAwareVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.UInt4Vector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.ZeroVector;
+import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeID;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.OversizedAllocationException;
+import org.apache.arrow.vector.util.SchemaChangeRuntimeException;
+
+/** Base class for Vectors that contain repeated values. */
+public abstract class BaseRepeatedValueVector extends BaseValueVector implements RepeatedValueVector, BaseListVector {
+
+ public static final FieldVector DEFAULT_DATA_VECTOR = ZeroVector.INSTANCE;
+ public static final String DATA_VECTOR_NAME = "$data$";
+
+ public static final byte OFFSET_WIDTH = 4;
+ protected ArrowBuf offsetBuffer;
+ protected FieldVector vector;
+ protected final CallBack callBack;
+ protected int valueCount;
+ protected long offsetAllocationSizeInBytes = INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH;
+ private final String name;
+
+ protected String defaultDataVectorName = DATA_VECTOR_NAME;
+
+ protected BaseRepeatedValueVector(String name, BufferAllocator allocator, CallBack callBack) {
+ this(name, allocator, DEFAULT_DATA_VECTOR, callBack);
+ }
+
+ protected BaseRepeatedValueVector(String name, BufferAllocator allocator, FieldVector vector, CallBack callBack) {
+ super(allocator);
+ this.name = name;
+ this.offsetBuffer = allocator.getEmpty();
+ this.vector = Preconditions.checkNotNull(vector, "data vector cannot be null");
+ this.callBack = callBack;
+ this.valueCount = 0;
+ }
+
+ @Override
+ public String getName() {
+ return name;
+ }
+
+ @Override
+ public boolean allocateNewSafe() {
+ boolean dataAlloc = false;
+ try {
+ allocateOffsetBuffer(offsetAllocationSizeInBytes);
+ dataAlloc = vector.allocateNewSafe();
+ } catch (Exception e) {
+ e.printStackTrace();
+ clear();
+ return false;
+ } finally {
+ if (!dataAlloc) {
+ clear();
+ }
+ }
+ return dataAlloc;
+ }
+
+ protected void allocateOffsetBuffer(final long size) {
+ final int curSize = (int) size;
+ offsetBuffer = allocator.buffer(curSize);
+ offsetBuffer.readerIndex(0);
+ offsetAllocationSizeInBytes = curSize;
+ offsetBuffer.setZero(0, offsetBuffer.capacity());
+ }
+
+ @Override
+ public void reAlloc() {
+ reallocOffsetBuffer();
+ vector.reAlloc();
+ }
+
+ protected void reallocOffsetBuffer() {
+ final long currentBufferCapacity = offsetBuffer.capacity();
+ long newAllocationSize = currentBufferCapacity * 2;
+ if (newAllocationSize == 0) {
+ if (offsetAllocationSizeInBytes > 0) {
+ newAllocationSize = offsetAllocationSizeInBytes;
+ } else {
+ newAllocationSize = INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH * 2;
+ }
+ }
+
+ newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize);
+ newAllocationSize = Math.min(newAllocationSize, (long) (OFFSET_WIDTH) * Integer.MAX_VALUE);
+ assert newAllocationSize >= 1;
+
+ if (newAllocationSize > MAX_ALLOCATION_SIZE || newAllocationSize <= offsetBuffer.capacity()) {
+ throw new OversizedAllocationException("Unable to expand the buffer");
+ }
+
+ final ArrowBuf newBuf = allocator.buffer(newAllocationSize);
+ newBuf.setBytes(0, offsetBuffer, 0, currentBufferCapacity);
+ newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity);
+ offsetBuffer.getReferenceManager().release(1);
+ offsetBuffer = newBuf;
+ offsetAllocationSizeInBytes = newAllocationSize;
+ }
+
+ /**
+ * Get the offset vector.
+ * @deprecated This API will be removed, as the current implementations no longer hold inner offset vectors.
+ *
+ * @return the underlying offset vector or null if none exists.
+ */
+ @Override
+ @Deprecated
+ public UInt4Vector getOffsetVector() {
+ throw new UnsupportedOperationException("There is no inner offset vector");
+ }
+
+ @Override
+ public FieldVector getDataVector() {
+ return vector;
+ }
+
+ @Override
+ public void setInitialCapacity(int numRecords) {
+ offsetAllocationSizeInBytes = (numRecords + 1) * OFFSET_WIDTH;
+ if (vector instanceof BaseFixedWidthVector || vector instanceof BaseVariableWidthVector) {
+ vector.setInitialCapacity(numRecords * RepeatedValueVector.DEFAULT_REPEAT_PER_RECORD);
+ } else {
+ vector.setInitialCapacity(numRecords);
+ }
+ }
+
+ /**
+ * Specialized version of setInitialCapacity() for ListVector. This is
+ * used by some callers when they want to explicitly control and be
+ * conservative about memory allocated for inner data vector. This is
+ * very useful when we are working with memory constraints for a query
+ * and have a fixed amount of memory reserved for the record batch. In
+ * such cases, we are likely to face OOM or related problems when
+ * we reserve memory for a record batch with value count x and
+ * do setInitialCapacity(x) such that each vector allocates only
+ * what is necessary and not the default amount but the multiplier
+ * forces the memory requirement to go beyond what was needed.
+ *
+ * @param numRecords value count
+ * @param density density of ListVector. Density is the average size of
+ * list per position in the List vector. For example, a
+ * density value of 10 implies each position in the list
+ * vector has a list of 10 values.
+ * A density value of 0.1 implies out of 10 positions in
+ * the list vector, 1 position has a list of size 1 and
+ * remaining positions are null (no lists) or empty lists.
+ * This helps in tightly controlling the memory we provision
+ * for inner data vector.
+ */
+ @Override
+ public void setInitialCapacity(int numRecords, double density) {
+ if ((numRecords * density) >= Integer.MAX_VALUE) {
+ throw new OversizedAllocationException("Requested amount of memory is more than max allowed");
+ }
+
+ offsetAllocationSizeInBytes = (numRecords + 1) * OFFSET_WIDTH;
+
+ int innerValueCapacity = Math.max((int) (numRecords * density), 1);
+
+ if (vector instanceof DensityAwareVector) {
+ ((DensityAwareVector) vector).setInitialCapacity(innerValueCapacity, density);
+ } else {
+ vector.setInitialCapacity(innerValueCapacity);
+ }
+ }
+
+ @Override
+ public int getValueCapacity() {
+ final int offsetValueCapacity = Math.max(getOffsetBufferValueCapacity() - 1, 0);
+ if (vector == DEFAULT_DATA_VECTOR) {
+ return offsetValueCapacity;
+ }
+ return Math.min(vector.getValueCapacity(), offsetValueCapacity);
+ }
+
+ protected int getOffsetBufferValueCapacity() {
+ return capAtMaxInt(offsetBuffer.capacity() / OFFSET_WIDTH);
+ }
+
+ @Override
+ public int getBufferSize() {
+ if (valueCount == 0) {
+ return 0;
+ }
+ return ((valueCount + 1) * OFFSET_WIDTH) + vector.getBufferSize();
+ }
+
+ @Override
+ public int getBufferSizeFor(int valueCount) {
+ if (valueCount == 0) {
+ return 0;
+ }
+
+ int innerVectorValueCount = offsetBuffer.getInt(valueCount * OFFSET_WIDTH);
+
+ return ((valueCount + 1) * OFFSET_WIDTH) + vector.getBufferSizeFor(innerVectorValueCount);
+ }
+
+ @Override
+ public Iterator<ValueVector> iterator() {
+ return Collections.<ValueVector>singleton(getDataVector()).iterator();
+ }
+
+ @Override
+ public void clear() {
+ offsetBuffer = releaseBuffer(offsetBuffer);
+ vector.clear();
+ valueCount = 0;
+ super.clear();
+ }
+
+ @Override
+ public void reset() {
+ offsetBuffer.setZero(0, offsetBuffer.capacity());
+ vector.reset();
+ valueCount = 0;
+ }
+
+ @Override
+ public ArrowBuf[] getBuffers(boolean clear) {
+ final ArrowBuf[] buffers;
+ if (getBufferSize() == 0) {
+ buffers = new ArrowBuf[0];
+ } else {
+ List<ArrowBuf> list = new ArrayList<>();
+ list.add(offsetBuffer);
+ list.addAll(Arrays.asList(vector.getBuffers(false)));
+ buffers = list.toArray(new ArrowBuf[list.size()]);
+ }
+ if (clear) {
+ for (ArrowBuf buffer : buffers) {
+ buffer.getReferenceManager().retain();
+ }
+ clear();
+ }
+ return buffers;
+ }
+
+ /**
+ * Get value indicating if inner vector is set.
+ * @return 1 if inner vector is explicitly set via #addOrGetVector else 0
+ */
+ public int size() {
+ return vector == DEFAULT_DATA_VECTOR ? 0 : 1;
+ }
+
+ /**
+ * Initialize the data vector (and execute callback) if it hasn't already been done,
+ * returns the data vector.
+ */
+ public <T extends ValueVector> AddOrGetResult<T> addOrGetVector(FieldType fieldType) {
+ boolean created = false;
+ if (vector instanceof NullVector) {
+ vector = fieldType.createNewSingleVector(defaultDataVectorName, allocator, callBack);
+ // returned vector must have the same field
+ created = true;
+ if (callBack != null &&
+ // not a schema change if changing from ZeroVector to ZeroVector
+ (fieldType.getType().getTypeID() != ArrowTypeID.Null)) {
+ callBack.doWork();
+ }
+ }
+
+ if (vector.getField().getType().getTypeID() != fieldType.getType().getTypeID()) {
+ final String msg = String.format("Inner vector type mismatch. Requested type: [%s], actual type: [%s]",
+ fieldType.getType().getTypeID(), vector.getField().getType().getTypeID());
+ throw new SchemaChangeRuntimeException(msg);
+ }
+
+ return new AddOrGetResult<>((T) vector, created);
+ }
+
+ protected void replaceDataVector(FieldVector v) {
+ vector.clear();
+ vector = v;
+ }
+
+ @Override
+ public int getValueCount() {
+ return valueCount;
+ }
+
+ /* returns the value count for inner data vector for this list vector */
+ public int getInnerValueCount() {
+ return vector.getValueCount();
+ }
+
+
+ /** Returns the value count for inner data vector at a particular index. */
+ public int getInnerValueCountAt(int index) {
+ return offsetBuffer.getInt((index + 1) * OFFSET_WIDTH) -
+ offsetBuffer.getInt(index * OFFSET_WIDTH);
+ }
+
+ /** Return if value at index is null (this implementation is always false). */
+ public boolean isNull(int index) {
+ return false;
+ }
+
+ /** Return if value at index is empty (this implementation is always false). */
+ public boolean isEmpty(int index) {
+ return false;
+ }
+
+ /** Starts a new repeated value. */
+ public int startNewValue(int index) {
+ while (index >= getOffsetBufferValueCapacity()) {
+ reallocOffsetBuffer();
+ }
+ int offset = offsetBuffer.getInt(index * OFFSET_WIDTH);
+ offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, offset);
+ setValueCount(index + 1);
+ return offset;
+ }
+
+ /** Preallocates the number of repeated values. */
+ public void setValueCount(int valueCount) {
+ this.valueCount = valueCount;
+ while (valueCount > getOffsetBufferValueCapacity()) {
+ reallocOffsetBuffer();
+ }
+ final int childValueCount = valueCount == 0 ? 0 :
+ offsetBuffer.getInt(valueCount * OFFSET_WIDTH);
+ vector.setValueCount(childValueCount);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/EmptyValuePopulator.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/EmptyValuePopulator.java
new file mode 100644
index 000000000..b32dce367
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/EmptyValuePopulator.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex;
+
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.UInt4Vector;
+
+/**
+ * A helper class that is used to track and populate empty values in repeated value vectors.
+ */
+public class EmptyValuePopulator {
+ private final UInt4Vector offsets;
+
+ public EmptyValuePopulator(UInt4Vector offsets) {
+ this.offsets = Preconditions.checkNotNull(offsets, "offsets cannot be null");
+ }
+
+ /**
+ * Marks all values since the last set as empty. The last set value is obtained from underlying offsets vector.
+ *
+ * @param lastIndex the last index (inclusive) in the offsets vector until which empty population takes place
+ * @throws java.lang.IndexOutOfBoundsException if lastIndex is negative or greater than offsets capacity.
+ */
+ public void populate(int lastIndex) {
+ if (lastIndex < 0) {
+ throw new IndexOutOfBoundsException("index cannot be negative");
+ }
+ final int lastSet = Math.max(offsets.getValueCount() - 1, 0);
+ final int previousEnd = offsets.get(lastSet); //0 ? 0 : accessor.get(lastSet);
+ for (int i = lastSet; i < lastIndex; i++) {
+ offsets.setSafe(i + 1, previousEnd);
+ }
+ offsets.setValueCount(lastIndex + 1);
+ }
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java
new file mode 100644
index 000000000..8d23f55fb
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java
@@ -0,0 +1,675 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex;
+
+import static java.util.Collections.singletonList;
+import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt;
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+import static org.apache.arrow.vector.complex.BaseRepeatedValueVector.DATA_VECTOR_NAME;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Objects;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.OutOfMemoryException;
+import org.apache.arrow.memory.util.ArrowBufPointer;
+import org.apache.arrow.memory.util.ByteFunctionHelpers;
+import org.apache.arrow.memory.util.CommonUtil;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.AddOrGetResult;
+import org.apache.arrow.vector.BaseValueVector;
+import org.apache.arrow.vector.BitVectorHelper;
+import org.apache.arrow.vector.BufferBacked;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.ZeroVector;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.complex.impl.UnionFixedSizeListReader;
+import org.apache.arrow.vector.complex.impl.UnionFixedSizeListWriter;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.JsonStringArrayList;
+import org.apache.arrow.vector.util.OversizedAllocationException;
+import org.apache.arrow.vector.util.SchemaChangeRuntimeException;
+import org.apache.arrow.vector.util.TransferPair;
+
+/** A ListVector where every list value is of the same size. */
+public class FixedSizeListVector extends BaseValueVector implements BaseListVector, PromotableVector {
+
+ public static FixedSizeListVector empty(String name, int size, BufferAllocator allocator) {
+ FieldType fieldType = FieldType.nullable(new ArrowType.FixedSizeList(size));
+ return new FixedSizeListVector(name, allocator, fieldType, null);
+ }
+
+ private FieldVector vector;
+ private ArrowBuf validityBuffer;
+ private final int listSize;
+ private final FieldType fieldType;
+ private final String name;
+
+ private UnionFixedSizeListReader reader;
+ private int valueCount;
+ private int validityAllocationSizeInBytes;
+
+ /**
+ * Creates a new instance.
+ *
+ * @param name The name for the vector.
+ * @param allocator The allocator to use for creating/reallocating buffers for the vector.
+ * @param fieldType The underlying data type of the vector.
+ * @param unusedSchemaChangeCallback Currently unused.
+ */
+ public FixedSizeListVector(String name,
+ BufferAllocator allocator,
+ FieldType fieldType,
+ CallBack unusedSchemaChangeCallback) {
+ super(allocator);
+
+ this.name = name;
+ this.validityBuffer = allocator.getEmpty();
+ this.vector = ZeroVector.INSTANCE;
+ this.fieldType = fieldType;
+ this.listSize = ((ArrowType.FixedSizeList) fieldType.getType()).getListSize();
+ Preconditions.checkArgument(listSize >= 0, "list size must be non-negative");
+ this.valueCount = 0;
+ this.validityAllocationSizeInBytes = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION);
+ }
+
+ @Override
+ public Field getField() {
+ List<Field> children = Collections.singletonList(getDataVector().getField());
+ return new Field(name, fieldType, children);
+ }
+
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.FIXED_SIZE_LIST;
+ }
+
+ @Override
+ public String getName() {
+ return name;
+ }
+
+ /** Get the fixed size for each list. */
+ public int getListSize() {
+ return listSize;
+ }
+
+ @Override
+ public void initializeChildrenFromFields(List<Field> children) {
+ if (children.size() != 1) {
+ throw new IllegalArgumentException("Lists have only one child. Found: " + children);
+ }
+ Field field = children.get(0);
+ AddOrGetResult<FieldVector> addOrGetVector = addOrGetVector(field.getFieldType());
+ if (!addOrGetVector.isCreated()) {
+ throw new IllegalArgumentException("Child vector already existed: " + addOrGetVector.getVector());
+ }
+ addOrGetVector.getVector().initializeChildrenFromFields(field.getChildren());
+ }
+
+ @Override
+ public List<FieldVector> getChildrenFromFields() {
+ return singletonList(vector);
+ }
+
+ @Override
+ public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers) {
+ if (ownBuffers.size() != 1) {
+ throw new IllegalArgumentException("Illegal buffer count, expected " + 1 + ", got: " + ownBuffers.size());
+ }
+
+ ArrowBuf bitBuffer = ownBuffers.get(0);
+
+ validityBuffer.getReferenceManager().release();
+ validityBuffer = BitVectorHelper.loadValidityBuffer(fieldNode, bitBuffer, allocator);
+ valueCount = fieldNode.getLength();
+
+ validityAllocationSizeInBytes = checkedCastToInt(validityBuffer.capacity());
+ }
+
+ @Override
+ public List<ArrowBuf> getFieldBuffers() {
+ List<ArrowBuf> result = new ArrayList<>(1);
+ setReaderAndWriterIndex();
+ result.add(validityBuffer);
+
+ return result;
+ }
+
+ private void setReaderAndWriterIndex() {
+ validityBuffer.readerIndex(0);
+ validityBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount));
+ }
+
+ /**
+ * Get the inner vectors.
+ *
+ * @deprecated This API will be removed as the current implementations no longer support inner vectors.
+ *
+ * @return the inner vectors for this field as defined by the TypeLayout
+ */
+ @Deprecated
+ @Override
+ public List<BufferBacked> getFieldInnerVectors() {
+ throw new UnsupportedOperationException("There are no inner vectors. Use getFieldBuffers");
+ }
+
+ @Override
+ public UnionFixedSizeListReader getReader() {
+ if (reader == null) {
+ reader = new UnionFixedSizeListReader(this);
+ }
+ return reader;
+ }
+
+ private void invalidateReader() {
+ reader = null;
+ }
+
+ @Override
+ public void allocateNew() throws OutOfMemoryException {
+ if (!allocateNewSafe()) {
+ throw new OutOfMemoryException("Failure while allocating memory");
+ }
+ }
+
+ @Override
+ public boolean allocateNewSafe() {
+ /* boolean to keep track if all the memory allocation were successful
+ * Used in the case of composite vectors when we need to allocate multiple
+ * buffers for multiple vectors. If one of the allocations failed we need to
+ * clear all the memory that we allocated
+ */
+ boolean success = false;
+ try {
+ /* we are doing a new allocation -- release the current buffers */
+ clear();
+ /* allocate validity buffer */
+ allocateValidityBuffer(validityAllocationSizeInBytes);
+ success = vector.allocateNewSafe();
+ } finally {
+ if (!success) {
+ clear();
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ private void allocateValidityBuffer(final long size) {
+ final int curSize = (int) size;
+ validityBuffer = allocator.buffer(curSize);
+ validityBuffer.readerIndex(0);
+ validityAllocationSizeInBytes = curSize;
+ validityBuffer.setZero(0, validityBuffer.capacity());
+ }
+
+ @Override
+ public void reAlloc() {
+ reallocValidityBuffer();
+ vector.reAlloc();
+ }
+
+ private void reallocValidityBuffer() {
+ final int currentBufferCapacity = checkedCastToInt(validityBuffer.capacity());
+ long newAllocationSize = currentBufferCapacity * 2;
+ if (newAllocationSize == 0) {
+ if (validityAllocationSizeInBytes > 0) {
+ newAllocationSize = validityAllocationSizeInBytes;
+ } else {
+ newAllocationSize = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION) * 2;
+ }
+ }
+
+ newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize);
+ assert newAllocationSize >= 1;
+
+ if (newAllocationSize > MAX_ALLOCATION_SIZE) {
+ throw new OversizedAllocationException("Unable to expand the buffer");
+ }
+
+ final ArrowBuf newBuf = allocator.buffer((int) newAllocationSize);
+ newBuf.setBytes(0, validityBuffer, 0, currentBufferCapacity);
+ newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity);
+ validityBuffer.getReferenceManager().release(1);
+ validityBuffer = newBuf;
+ validityAllocationSizeInBytes = (int) newAllocationSize;
+ }
+
+ public FieldVector getDataVector() {
+ return vector;
+ }
+
+ /**
+ * Start a new value in the list vector.
+ *
+ * @param index index of the value to start
+ */
+ public int startNewValue(int index) {
+ while (index >= getValidityBufferValueCapacity()) {
+ reallocValidityBuffer();
+ }
+
+ BitVectorHelper.setBit(validityBuffer, index);
+ return index * listSize;
+ }
+
+ public UnionFixedSizeListWriter getWriter() {
+ return new UnionFixedSizeListWriter(this);
+ }
+
+ @Override
+ public void setInitialCapacity(int numRecords) {
+ validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords);
+ vector.setInitialCapacity(numRecords * listSize);
+ }
+
+ @Override
+ public int getValueCapacity() {
+ if (vector == ZeroVector.INSTANCE || listSize == 0) {
+ return 0;
+ }
+ return Math.min(vector.getValueCapacity() / listSize, getValidityBufferValueCapacity());
+ }
+
+ @Override
+ public int getBufferSize() {
+ if (getValueCount() == 0) {
+ return 0;
+ }
+ return getValidityBufferSizeFromCount(valueCount) + vector.getBufferSize();
+ }
+
+ @Override
+ public int getBufferSizeFor(int valueCount) {
+ if (valueCount == 0) {
+ return 0;
+ }
+ return getValidityBufferSizeFromCount(valueCount) +
+ vector.getBufferSizeFor(valueCount * listSize);
+ }
+
+ @Override
+ public Iterator<ValueVector> iterator() {
+ return Collections.<ValueVector>singleton(vector).iterator();
+ }
+
+ @Override
+ public void clear() {
+ validityBuffer = releaseBuffer(validityBuffer);
+ vector.clear();
+ valueCount = 0;
+ super.clear();
+ }
+
+ @Override
+ public void reset() {
+ validityBuffer.setZero(0, validityBuffer.capacity());
+ vector.reset();
+ valueCount = 0;
+ }
+
+ @Override
+ public ArrowBuf[] getBuffers(boolean clear) {
+ setReaderAndWriterIndex();
+ final ArrowBuf[] buffers;
+ if (getBufferSize() == 0) {
+ buffers = new ArrowBuf[0];
+ } else {
+ List<ArrowBuf> list = new ArrayList<>();
+ list.add(validityBuffer);
+ list.addAll(Arrays.asList(vector.getBuffers(false)));
+ buffers = list.toArray(new ArrowBuf[list.size()]);
+ }
+ if (clear) {
+ for (ArrowBuf buffer : buffers) {
+ buffer.getReferenceManager().retain();
+ }
+ clear();
+ }
+ return buffers;
+ }
+
+ /**
+ * Get value indicating if inner vector is set.
+ * @return 1 if inner vector is explicitly set via #addOrGetVector else 0
+ */
+ public int size() {
+ return vector == ZeroVector.INSTANCE ? 0 : 1;
+ }
+
+ @Override
+ @SuppressWarnings("unchecked")
+ public <T extends ValueVector> AddOrGetResult<T> addOrGetVector(FieldType type) {
+ boolean created = false;
+ if (vector == ZeroVector.INSTANCE) {
+ vector = type.createNewSingleVector(DATA_VECTOR_NAME, allocator, null);
+ invalidateReader();
+ created = true;
+ }
+ // returned vector must have the same field
+ if (!Objects.equals(vector.getField().getType(), type.getType())) {
+ final String msg = String.format("Inner vector type mismatch. Requested type: [%s], actual type: [%s]",
+ type.getType(), vector.getField().getType());
+ throw new SchemaChangeRuntimeException(msg);
+ }
+
+ return new AddOrGetResult<>((T) vector, created);
+ }
+
+ @Override
+ public void copyFromSafe(int inIndex, int outIndex, ValueVector from) {
+ copyFrom(inIndex, outIndex, from);
+ }
+
+ @Override
+ public void copyFrom(int fromIndex, int thisIndex, ValueVector from) {
+ Preconditions.checkArgument(this.getMinorType() == from.getMinorType());
+ TransferPair pair = from.makeTransferPair(this);
+ pair.copyValueSafe(fromIndex, thisIndex);
+ }
+
+ @Override
+ public UnionVector promoteToUnion() {
+ UnionVector vector = new UnionVector(name, allocator, /* field type */ null, /* call-back */ null);
+ this.vector.clear();
+ this.vector = vector;
+ invalidateReader();
+ return vector;
+ }
+
+ @Override
+ public long getValidityBufferAddress() {
+ return validityBuffer.memoryAddress();
+ }
+
+ @Override
+ public long getDataBufferAddress() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long getOffsetBufferAddress() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public ArrowBuf getValidityBuffer() {
+ return validityBuffer;
+ }
+
+ @Override
+ public ArrowBuf getDataBuffer() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public ArrowBuf getOffsetBuffer() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public List<?> getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ }
+ final List<Object> vals = new JsonStringArrayList<>(listSize);
+ for (int i = 0; i < listSize; i++) {
+ vals.add(vector.getObject(index * listSize + i));
+ }
+ return vals;
+ }
+
+ /**
+ * Returns whether the value at index null.
+ */
+ public boolean isNull(int index) {
+ return (isSet(index) == 0);
+ }
+
+ /**
+ * Returns non-zero when the value at index is non-null.
+ */
+ public int isSet(int index) {
+ final int byteIndex = index >> 3;
+ final byte b = validityBuffer.getByte(byteIndex);
+ final int bitIndex = index & 7;
+ return (b >> bitIndex) & 0x01;
+ }
+
+ @Override
+ public int getNullCount() {
+ return BitVectorHelper.getNullCount(validityBuffer, valueCount);
+ }
+
+ @Override
+ public int getValueCount() {
+ return valueCount;
+ }
+
+ /**
+ * Returns the number of elements the validity buffer can represent with its
+ * current capacity.
+ */
+ private int getValidityBufferValueCapacity() {
+ return capAtMaxInt(validityBuffer.capacity() * 8);
+ }
+
+ /**
+ * Sets the value at index to null. Reallocates if index is larger than capacity.
+ */
+ public void setNull(int index) {
+ while (index >= getValidityBufferValueCapacity()) {
+ reallocValidityBuffer();
+ }
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+
+ /** Sets the value at index to not-null. Reallocates if index is larger than capacity. */
+ public void setNotNull(int index) {
+ while (index >= getValidityBufferValueCapacity()) {
+ reallocValidityBuffer();
+ }
+ BitVectorHelper.setBit(validityBuffer, index);
+ }
+
+ @Override
+ public void setValueCount(int valueCount) {
+ this.valueCount = valueCount;
+ while (valueCount > getValidityBufferValueCapacity()) {
+ reallocValidityBuffer();
+ }
+ vector.setValueCount(valueCount * listSize);
+ }
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return getTransferPair(ref, allocator, null);
+ }
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) {
+ return new TransferImpl(ref, allocator, callBack);
+ }
+
+ @Override
+ public TransferPair makeTransferPair(ValueVector target) {
+ return new TransferImpl((FixedSizeListVector) target);
+ }
+
+ @Override
+ public int hashCode(int index) {
+ return hashCode(index, null);
+ }
+
+ @Override
+ public int hashCode(int index, ArrowBufHasher hasher) {
+ if (isSet(index) == 0) {
+ return ArrowBufPointer.NULL_HASH_CODE;
+ }
+ int hash = 0;
+ for (int i = 0; i < listSize; i++) {
+ hash = ByteFunctionHelpers.combineHash(hash, vector.hashCode(index * listSize + i, hasher));
+ }
+ return hash;
+ }
+
+ @Override
+ public <OUT, IN> OUT accept(VectorVisitor<OUT, IN> visitor, IN value) {
+ return visitor.visit(this, value);
+ }
+
+ @Override
+ public int getElementStartIndex(int index) {
+ return listSize * index;
+ }
+
+ @Override
+ public int getElementEndIndex(int index) {
+ return listSize * (index + 1);
+ }
+
+ private class TransferImpl implements TransferPair {
+
+ FixedSizeListVector to;
+ TransferPair dataPair;
+
+ public TransferImpl(String name, BufferAllocator allocator, CallBack callBack) {
+ this(new FixedSizeListVector(name, allocator, fieldType, callBack));
+ }
+
+ public TransferImpl(FixedSizeListVector to) {
+ this.to = to;
+ to.addOrGetVector(vector.getField().getFieldType());
+ dataPair = vector.makeTransferPair(to.vector);
+ }
+
+ @Override
+ public void transfer() {
+ to.clear();
+ dataPair.transfer();
+ to.validityBuffer = BaseValueVector.transferBuffer(validityBuffer, to.allocator);
+ to.setValueCount(valueCount);
+ clear();
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ Preconditions.checkArgument(startIndex >= 0 && length >= 0 && startIndex + length <= valueCount,
+ "Invalid parameters startIndex: %s, length: %s for valueCount: %s", startIndex, length, valueCount);
+ final int startPoint = listSize * startIndex;
+ final int sliceLength = listSize * length;
+ to.clear();
+
+ /* splitAndTransfer validity buffer */
+ splitAndTransferValidityBuffer(startIndex, length, to);
+ /* splitAndTransfer data buffer */
+ dataPair.splitAndTransfer(startPoint, sliceLength);
+ to.setValueCount(length);
+ }
+
+ /*
+ * transfer the validity.
+ */
+ private void splitAndTransferValidityBuffer(int startIndex, int length, FixedSizeListVector target) {
+ int firstByteSource = BitVectorHelper.byteIndex(startIndex);
+ int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1);
+ int byteSizeTarget = getValidityBufferSizeFromCount(length);
+ int offset = startIndex % 8;
+
+ if (length > 0) {
+ if (offset == 0) {
+ // slice
+ if (target.validityBuffer != null) {
+ target.validityBuffer.getReferenceManager().release();
+ }
+ target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget);
+ target.validityBuffer.getReferenceManager().retain(1);
+ } else {
+ /* Copy data
+ * When the first bit starts from the middle of a byte (offset != 0),
+ * copy data from src BitVector.
+ * Each byte in the target is composed by a part in i-th byte,
+ * another part in (i+1)-th byte.
+ */
+ target.allocateValidityBuffer(byteSizeTarget);
+
+ for (int i = 0; i < byteSizeTarget - 1; i++) {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(validityBuffer, firstByteSource + i, offset);
+ byte b2 = BitVectorHelper.getBitsFromNextByte(validityBuffer, firstByteSource + i + 1, offset);
+
+ target.validityBuffer.setByte(i, (b1 + b2));
+ }
+
+ /* Copying the last piece is done in the following manner:
+ * if the source vector has 1 or more bytes remaining, we copy
+ * the last piece as a byte formed by shifting data
+ * from the current byte and the next byte.
+ *
+ * if the source vector has no more bytes remaining
+ * (we are at the last byte), we copy the last piece as a byte
+ * by shifting data from the current byte.
+ */
+ if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(validityBuffer,
+ firstByteSource + byteSizeTarget - 1, offset);
+ byte b2 = BitVectorHelper.getBitsFromNextByte(validityBuffer,
+ firstByteSource + byteSizeTarget, offset);
+
+ target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2);
+ } else {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(validityBuffer,
+ firstByteSource + byteSizeTarget - 1, offset);
+ target.validityBuffer.setByte(byteSizeTarget - 1, b1);
+ }
+ }
+ }
+ }
+
+ @Override
+ public ValueVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ while (toIndex >= to.getValueCapacity()) {
+ to.reAlloc();
+ }
+ BitVectorHelper.setValidityBit(to.validityBuffer, toIndex, isSet(fromIndex));
+ int fromOffset = fromIndex * listSize;
+ int toOffset = toIndex * listSize;
+ for (int i = 0; i < listSize; i++) {
+ dataPair.copyValueSafe(fromOffset + i, toOffset + i);
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java
new file mode 100644
index 000000000..6fbdda277
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java
@@ -0,0 +1,1036 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex;
+
+import static java.util.Collections.singletonList;
+import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt;
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+import static org.apache.arrow.util.Preconditions.checkNotNull;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.OutOfMemoryException;
+import org.apache.arrow.memory.util.ArrowBufPointer;
+import org.apache.arrow.memory.util.ByteFunctionHelpers;
+import org.apache.arrow.memory.util.CommonUtil;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.AddOrGetResult;
+import org.apache.arrow.vector.BaseFixedWidthVector;
+import org.apache.arrow.vector.BaseValueVector;
+import org.apache.arrow.vector.BaseVariableWidthVector;
+import org.apache.arrow.vector.BitVectorHelper;
+import org.apache.arrow.vector.BufferBacked;
+import org.apache.arrow.vector.DensityAwareVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.UInt4Vector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.ZeroVector;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.complex.impl.ComplexCopier;
+import org.apache.arrow.vector.complex.impl.UnionLargeListReader;
+import org.apache.arrow.vector.complex.impl.UnionLargeListWriter;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.JsonStringArrayList;
+import org.apache.arrow.vector.util.OversizedAllocationException;
+import org.apache.arrow.vector.util.SchemaChangeRuntimeException;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * A list vector contains lists of a specific type of elements. Its structure contains 3 elements.
+ * <ol>
+ * <li>A validity buffer.</li>
+ * <li> An offset buffer, that denotes lists boundaries. </li>
+ * <li> A child data vector that contains the elements of lists. </li>
+ * </ol>
+ *
+ * This is the LargeList variant of list, it has a 64-bit wide offset
+ *
+ * <p>
+ * WARNING: Currently Arrow in Java doesn't support 64-bit vectors. This class
+ * follows the expected behaviour of a LargeList but doesn't actually support allocating
+ * a 64-bit vector. It has little use until 64-bit vectors are supported and should be used
+ * with caution.
+ * todo review checkedCastToInt usage in this class.
+ * Once int64 indexed vectors are supported these checks aren't needed.
+ * </p>
+ */
+public class LargeListVector extends BaseValueVector implements RepeatedValueVector, FieldVector, PromotableVector {
+
+ public static LargeListVector empty(String name, BufferAllocator allocator) {
+ return new LargeListVector(name, allocator, FieldType.nullable(ArrowType.LargeList.INSTANCE), null);
+ }
+
+ public static final FieldVector DEFAULT_DATA_VECTOR = ZeroVector.INSTANCE;
+ public static final String DATA_VECTOR_NAME = "$data$";
+
+ public static final byte OFFSET_WIDTH = 8;
+ protected ArrowBuf offsetBuffer;
+ protected FieldVector vector;
+ protected final CallBack callBack;
+ protected int valueCount;
+ protected long offsetAllocationSizeInBytes = INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH;
+ private final String name;
+
+ protected String defaultDataVectorName = DATA_VECTOR_NAME;
+ protected ArrowBuf validityBuffer;
+ protected UnionLargeListReader reader;
+ private final FieldType fieldType;
+ private int validityAllocationSizeInBytes;
+
+ /**
+ * The maximum index that is actually set.
+ */
+ private int lastSet;
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param name The name of the instance.
+ * @param allocator The allocator to use for allocating/reallocating buffers.
+ * @param fieldType The type of this list.
+ * @param callBack A schema change callback.
+ */
+ public LargeListVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) {
+ super(allocator);
+ this.name = name;
+ this.validityBuffer = allocator.getEmpty();
+ this.fieldType = checkNotNull(fieldType);
+ this.callBack = callBack;
+ this.validityAllocationSizeInBytes = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION);
+ this.lastSet = -1;
+ this.offsetBuffer = allocator.getEmpty();
+ this.vector = vector == null ? DEFAULT_DATA_VECTOR : vector;
+ this.valueCount = 0;
+ }
+
+ @Override
+ public void initializeChildrenFromFields(List<Field> children) {
+ if (children.size() != 1) {
+ throw new IllegalArgumentException("Lists have only one child. Found: " + children);
+ }
+ Field field = children.get(0);
+ AddOrGetResult<FieldVector> addOrGetVector = addOrGetVector(field.getFieldType());
+ if (!addOrGetVector.isCreated()) {
+ throw new IllegalArgumentException("Child vector already existed: " + addOrGetVector.getVector());
+ }
+
+ addOrGetVector.getVector().initializeChildrenFromFields(field.getChildren());
+ }
+
+ @Override
+ public void setInitialCapacity(int numRecords) {
+ validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords);
+ offsetAllocationSizeInBytes = (long) (numRecords + 1) * OFFSET_WIDTH;
+ if (vector instanceof BaseFixedWidthVector || vector instanceof BaseVariableWidthVector) {
+ vector.setInitialCapacity(numRecords * RepeatedValueVector.DEFAULT_REPEAT_PER_RECORD);
+ } else {
+ vector.setInitialCapacity(numRecords);
+ }
+ }
+
+ /**
+ * Specialized version of setInitialCapacity() for ListVector. This is
+ * used by some callers when they want to explicitly control and be
+ * conservative about memory allocated for inner data vector. This is
+ * very useful when we are working with memory constraints for a query
+ * and have a fixed amount of memory reserved for the record batch. In
+ * such cases, we are likely to face OOM or related problems when
+ * we reserve memory for a record batch with value count x and
+ * do setInitialCapacity(x) such that each vector allocates only
+ * what is necessary and not the default amount but the multiplier
+ * forces the memory requirement to go beyond what was needed.
+ *
+ * @param numRecords value count
+ * @param density density of ListVector. Density is the average size of
+ * list per position in the List vector. For example, a
+ * density value of 10 implies each position in the list
+ * vector has a list of 10 values.
+ * A density value of 0.1 implies out of 10 positions in
+ * the list vector, 1 position has a list of size 1 and
+ * remaining positions are null (no lists) or empty lists.
+ * This helps in tightly controlling the memory we provision
+ * for inner data vector.
+ */
+ @Override
+ public void setInitialCapacity(int numRecords, double density) {
+ validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords);
+ if ((numRecords * density) >= Integer.MAX_VALUE) {
+ throw new OversizedAllocationException("Requested amount of memory is more than max allowed");
+ }
+
+ offsetAllocationSizeInBytes = (numRecords + 1) * OFFSET_WIDTH;
+
+ int innerValueCapacity = Math.max((int) (numRecords * density), 1);
+
+ if (vector instanceof DensityAwareVector) {
+ ((DensityAwareVector) vector).setInitialCapacity(innerValueCapacity, density);
+ } else {
+ vector.setInitialCapacity(innerValueCapacity);
+ }
+ }
+
+ /**
+ * Get the density of this ListVector.
+ * @return density
+ */
+ public double getDensity() {
+ if (valueCount == 0) {
+ return 0.0D;
+ }
+ final long startOffset = offsetBuffer.getLong(0L);
+ final long endOffset = offsetBuffer.getLong((long) valueCount * OFFSET_WIDTH);
+ final double totalListSize = endOffset - startOffset;
+ return totalListSize / valueCount;
+ }
+
+ @Override
+ public List<FieldVector> getChildrenFromFields() {
+ return singletonList(getDataVector());
+ }
+
+ /**
+ * Load the buffers of this vector with provided source buffers.
+ * The caller manages the source buffers and populates them before invoking
+ * this method.
+ * @param fieldNode the fieldNode indicating the value count
+ * @param ownBuffers the buffers for this Field (own buffers only, children not included)
+ */
+ @Override
+ public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers) {
+ if (ownBuffers.size() != 2) {
+ throw new IllegalArgumentException("Illegal buffer count, expected " + 2 + ", got: " + ownBuffers.size());
+ }
+
+ ArrowBuf bitBuffer = ownBuffers.get(0);
+ ArrowBuf offBuffer = ownBuffers.get(1);
+
+ validityBuffer.getReferenceManager().release();
+ validityBuffer = BitVectorHelper.loadValidityBuffer(fieldNode, bitBuffer, allocator);
+ offsetBuffer.getReferenceManager().release();
+ offsetBuffer = offBuffer.getReferenceManager().retain(offBuffer, allocator);
+
+ validityAllocationSizeInBytes = checkedCastToInt(validityBuffer.capacity());
+ offsetAllocationSizeInBytes = offsetBuffer.capacity();
+
+ lastSet = fieldNode.getLength() - 1;
+ valueCount = fieldNode.getLength();
+ }
+
+ /**
+ * Get the buffers belonging to this vector.
+ * @return the inner buffers.
+ */
+ @Override
+ public List<ArrowBuf> getFieldBuffers() {
+ List<ArrowBuf> result = new ArrayList<>(2);
+ setReaderAndWriterIndex();
+ result.add(validityBuffer);
+ result.add(offsetBuffer);
+
+ return result;
+ }
+
+ /**
+ * Set the reader and writer indexes for the inner buffers.
+ */
+ private void setReaderAndWriterIndex() {
+ validityBuffer.readerIndex(0);
+ offsetBuffer.readerIndex(0);
+ if (valueCount == 0) {
+ validityBuffer.writerIndex(0);
+ offsetBuffer.writerIndex(0);
+ } else {
+ validityBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount));
+ offsetBuffer.writerIndex((valueCount + 1) * OFFSET_WIDTH);
+ }
+ }
+
+ /**
+ * Get the inner vectors.
+ *
+ * @deprecated This API will be removed as the current implementations no longer support inner vectors.
+ *
+ * @return the inner vectors for this field as defined by the TypeLayout
+ */
+ @Deprecated
+ @Override
+ public List<BufferBacked> getFieldInnerVectors() {
+ throw new UnsupportedOperationException("There are no inner vectors. Use getFieldBuffers");
+ }
+
+ /**
+ * Same as {@link #allocateNewSafe()}.
+ */
+ @Override
+ public void allocateNew() throws OutOfMemoryException {
+ if (!allocateNewSafe()) {
+ throw new OutOfMemoryException("Failure while allocating memory");
+ }
+ }
+
+ /**
+ * Allocate memory for the vector. We internally use a default value count
+ * of 4096 to allocate memory for at least these many elements in the
+ * vector.
+ *
+ * @return false if memory allocation fails, true otherwise.
+ */
+ public boolean allocateNewSafe() {
+ boolean success = false;
+ try {
+ /* we are doing a new allocation -- release the current buffers */
+ clear();
+ /* allocate validity buffer */
+ allocateValidityBuffer(validityAllocationSizeInBytes);
+ /* allocate offset and data buffer */
+ boolean dataAlloc = false;
+ try {
+ allocateOffsetBuffer(offsetAllocationSizeInBytes);
+ dataAlloc = vector.allocateNewSafe();
+ } catch (Exception e) {
+ e.printStackTrace();
+ clear();
+ return false;
+ } finally {
+ if (!dataAlloc) {
+ clear();
+ }
+ }
+ success = dataAlloc;
+ } finally {
+ if (!success) {
+ clear();
+ return false;
+ }
+ }
+ return true;
+ }
+
+ private void allocateValidityBuffer(final long size) {
+ final int curSize = (int) size;
+ validityBuffer = allocator.buffer(curSize);
+ validityBuffer.readerIndex(0);
+ validityAllocationSizeInBytes = curSize;
+ validityBuffer.setZero(0, validityBuffer.capacity());
+ }
+
+ protected void allocateOffsetBuffer(final long size) {
+ offsetBuffer = allocator.buffer(size);
+ offsetBuffer.readerIndex(0);
+ offsetAllocationSizeInBytes = size;
+ offsetBuffer.setZero(0, offsetBuffer.capacity());
+ }
+
+ /**
+ * Resize the vector to increase the capacity. The internal behavior is to
+ * double the current value capacity.
+ */
+ @Override
+ public void reAlloc() {
+ /* reallocate the validity buffer */
+ reallocValidityBuffer();
+ /* reallocate the offset and data */
+ reallocOffsetBuffer();
+ vector.reAlloc();
+ }
+
+ private void reallocValidityAndOffsetBuffers() {
+ reallocOffsetBuffer();
+ reallocValidityBuffer();
+ }
+
+ protected void reallocOffsetBuffer() {
+ final long currentBufferCapacity = offsetBuffer.capacity();
+ long newAllocationSize = currentBufferCapacity * 2;
+ if (newAllocationSize == 0) {
+ if (offsetAllocationSizeInBytes > 0) {
+ newAllocationSize = offsetAllocationSizeInBytes;
+ } else {
+ newAllocationSize = INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH * 2;
+ }
+ }
+
+ newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize);
+ newAllocationSize = Math.min(newAllocationSize, (long) (OFFSET_WIDTH) * Integer.MAX_VALUE);
+ assert newAllocationSize >= 1;
+
+ if (newAllocationSize > MAX_ALLOCATION_SIZE || newAllocationSize <= offsetBuffer.capacity()) {
+ throw new OversizedAllocationException("Unable to expand the buffer");
+ }
+
+ final ArrowBuf newBuf = allocator.buffer(newAllocationSize);
+ newBuf.setBytes(0, offsetBuffer, 0, currentBufferCapacity);
+ newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity);
+ offsetBuffer.getReferenceManager().release(1);
+ offsetBuffer = newBuf;
+ offsetAllocationSizeInBytes = newAllocationSize;
+ }
+
+ private void reallocValidityBuffer() {
+ final int currentBufferCapacity = checkedCastToInt(validityBuffer.capacity());
+ long newAllocationSize = currentBufferCapacity * 2;
+ if (newAllocationSize == 0) {
+ if (validityAllocationSizeInBytes > 0) {
+ newAllocationSize = validityAllocationSizeInBytes;
+ } else {
+ newAllocationSize = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION) * 2;
+ }
+ }
+ newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize);
+ assert newAllocationSize >= 1;
+
+ if (newAllocationSize > MAX_ALLOCATION_SIZE) {
+ throw new OversizedAllocationException("Unable to expand the buffer");
+ }
+
+ final ArrowBuf newBuf = allocator.buffer((int) newAllocationSize);
+ newBuf.setBytes(0, validityBuffer, 0, currentBufferCapacity);
+ newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity);
+ validityBuffer.getReferenceManager().release(1);
+ validityBuffer = newBuf;
+ validityAllocationSizeInBytes = (int) newAllocationSize;
+ }
+
+ /**
+ * Same as {@link #copyFrom(int, int, ValueVector)} except that
+ * it handles the case when the capacity of the vector needs to be expanded
+ * before copy.
+ * @param inIndex position to copy from in source vector
+ * @param outIndex position to copy to in this vector
+ * @param from source vector
+ */
+ @Override
+ public void copyFromSafe(int inIndex, int outIndex, ValueVector from) {
+ copyFrom(inIndex, outIndex, from);
+ }
+
+ /**
+ * Copy a cell value from a particular index in source vector to a particular
+ * position in this vector.
+ * @param inIndex position to copy from in source vector
+ * @param outIndex position to copy to in this vector
+ * @param from source vector
+ */
+ @Override
+ public void copyFrom(int inIndex, int outIndex, ValueVector from) {
+ Preconditions.checkArgument(this.getMinorType() == from.getMinorType());
+ FieldReader in = from.getReader();
+ in.setPosition(inIndex);
+ UnionLargeListWriter out = getWriter();
+ out.setPosition(outIndex);
+ ComplexCopier.copy(in, out);
+ }
+
+ /**
+ * Get the offset vector.
+ * @deprecated This API will be removed, as the current implementations no longer hold inner offset vectors.
+ *
+ * @return the underlying offset vector or null if none exists.
+ */
+ @Override
+ @Deprecated
+ public UInt4Vector getOffsetVector() {
+ throw new UnsupportedOperationException("There is no inner offset vector");
+ }
+
+ /**
+ * Get the inner data vector for this list vector.
+ * @return data vector
+ */
+ @Override
+ public FieldVector getDataVector() {
+ return vector;
+ }
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return getTransferPair(ref, allocator, null);
+ }
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) {
+ return new TransferImpl(ref, allocator, callBack);
+ }
+
+ @Override
+ public TransferPair makeTransferPair(ValueVector target) {
+ return new TransferImpl((LargeListVector) target);
+ }
+
+ @Override
+ public long getValidityBufferAddress() {
+ return (validityBuffer.memoryAddress());
+ }
+
+ @Override
+ public long getDataBufferAddress() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long getOffsetBufferAddress() {
+ return (offsetBuffer.memoryAddress());
+ }
+
+ @Override
+ public ArrowBuf getValidityBuffer() {
+ return validityBuffer;
+ }
+
+ @Override
+ public ArrowBuf getDataBuffer() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public ArrowBuf getOffsetBuffer() {
+ return offsetBuffer;
+ }
+
+ @Override
+ public int getValueCount() {
+ return valueCount;
+ }
+
+ @Override
+ public int hashCode(int index) {
+ return hashCode(index, null);
+ }
+
+ @Override
+ public int hashCode(int index, ArrowBufHasher hasher) {
+ if (isSet(index) == 0) {
+ return ArrowBufPointer.NULL_HASH_CODE;
+ }
+ int hash = 0;
+ final long start = offsetBuffer.getLong((long) index * OFFSET_WIDTH);
+ final long end = offsetBuffer.getLong(((long) index + 1L) * OFFSET_WIDTH);
+ for (long i = start; i < end; i++) {
+ hash = ByteFunctionHelpers.combineHash(hash, vector.hashCode(checkedCastToInt(i), hasher));
+ }
+ return hash;
+ }
+
+ @Override
+ public <OUT, IN> OUT accept(VectorVisitor<OUT, IN> visitor, IN value) {
+ return visitor.visit(this, value);
+ }
+
+ public UnionLargeListWriter getWriter() {
+ return new UnionLargeListWriter(this);
+ }
+
+ protected void replaceDataVector(FieldVector v) {
+ vector.clear();
+ vector = v;
+ }
+
+ @Override
+ public UnionVector promoteToUnion() {
+ UnionVector vector = new UnionVector("$data$", allocator, /* field type */ null, callBack);
+ replaceDataVector(vector);
+ invalidateReader();
+ if (callBack != null) {
+ callBack.doWork();
+ }
+ return vector;
+ }
+
+ private class TransferImpl implements TransferPair {
+
+ LargeListVector to;
+ TransferPair dataTransferPair;
+
+ public TransferImpl(String name, BufferAllocator allocator, CallBack callBack) {
+ this(new LargeListVector(name, allocator, fieldType, callBack));
+ }
+
+ public TransferImpl(LargeListVector to) {
+ this.to = to;
+ to.addOrGetVector(vector.getField().getFieldType());
+ if (to.getDataVector() instanceof ZeroVector) {
+ to.addOrGetVector(vector.getField().getFieldType());
+ }
+ dataTransferPair = getDataVector().makeTransferPair(to.getDataVector());
+ }
+
+ /**
+ * Transfer this vector'data to another vector. The memory associated
+ * with this vector is transferred to the allocator of target vector
+ * for accounting and management purposes.
+ */
+ @Override
+ public void transfer() {
+ to.clear();
+ dataTransferPair.transfer();
+ to.validityBuffer = transferBuffer(validityBuffer, to.allocator);
+ to.offsetBuffer = transferBuffer(offsetBuffer, to.allocator);
+ to.lastSet = lastSet;
+ if (valueCount > 0) {
+ to.setValueCount(valueCount);
+ }
+ clear();
+ }
+
+ /**
+ * Slice this vector at desired index and length and transfer the
+ * corresponding data to the target vector.
+ * @param startIndex start position of the split in source vector.
+ * @param length length of the split.
+ */
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ Preconditions.checkArgument(startIndex >= 0 && length >= 0 && startIndex + length <= valueCount,
+ "Invalid parameters startIndex: %s, length: %s for valueCount: %s", startIndex, length, valueCount);
+ final long startPoint = offsetBuffer.getLong((long) startIndex * OFFSET_WIDTH);
+ final long sliceLength = offsetBuffer.getLong((long) (startIndex + length) * OFFSET_WIDTH) - startPoint;
+ to.clear();
+ to.allocateOffsetBuffer((length + 1) * OFFSET_WIDTH);
+ /* splitAndTransfer offset buffer */
+ for (int i = 0; i < length + 1; i++) {
+ final long relativeOffset = offsetBuffer.getLong((long) (startIndex + i) * OFFSET_WIDTH) - startPoint;
+ to.offsetBuffer.setLong((long) i * OFFSET_WIDTH, relativeOffset);
+ }
+ /* splitAndTransfer validity buffer */
+ splitAndTransferValidityBuffer(startIndex, length, to);
+ /* splitAndTransfer data buffer */
+ dataTransferPair.splitAndTransfer(checkedCastToInt(startPoint), checkedCastToInt(sliceLength));
+ to.lastSet = length - 1;
+ to.setValueCount(length);
+ }
+
+ /*
+ * transfer the validity.
+ */
+ private void splitAndTransferValidityBuffer(int startIndex, int length, LargeListVector target) {
+ int firstByteSource = BitVectorHelper.byteIndex(startIndex);
+ int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1);
+ int byteSizeTarget = getValidityBufferSizeFromCount(length);
+ int offset = startIndex % 8;
+
+ if (length > 0) {
+ if (offset == 0) {
+ // slice
+ if (target.validityBuffer != null) {
+ target.validityBuffer.getReferenceManager().release();
+ }
+ target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget);
+ target.validityBuffer.getReferenceManager().retain(1);
+ } else {
+ /* Copy data
+ * When the first bit starts from the middle of a byte (offset != 0),
+ * copy data from src BitVector.
+ * Each byte in the target is composed by a part in i-th byte,
+ * another part in (i+1)-th byte.
+ */
+ target.allocateValidityBuffer(byteSizeTarget);
+
+ for (int i = 0; i < byteSizeTarget - 1; i++) {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(validityBuffer, firstByteSource + i, offset);
+ byte b2 = BitVectorHelper.getBitsFromNextByte(validityBuffer, firstByteSource + i + 1, offset);
+
+ target.validityBuffer.setByte(i, (b1 + b2));
+ }
+
+ /* Copying the last piece is done in the following manner:
+ * if the source vector has 1 or more bytes remaining, we copy
+ * the last piece as a byte formed by shifting data
+ * from the current byte and the next byte.
+ *
+ * if the source vector has no more bytes remaining
+ * (we are at the last byte), we copy the last piece as a byte
+ * by shifting data from the current byte.
+ */
+ if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(validityBuffer,
+ firstByteSource + byteSizeTarget - 1, offset);
+ byte b2 = BitVectorHelper.getBitsFromNextByte(validityBuffer,
+ firstByteSource + byteSizeTarget, offset);
+
+ target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2);
+ } else {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(validityBuffer,
+ firstByteSource + byteSizeTarget - 1, offset);
+ target.validityBuffer.setByte(byteSizeTarget - 1, b1);
+ }
+ }
+ }
+ }
+
+ @Override
+ public ValueVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void copyValueSafe(int from, int to) {
+ this.to.copyFrom(from, to, LargeListVector.this);
+ }
+ }
+
+ @Override
+ public UnionLargeListReader getReader() {
+ if (reader == null) {
+ reader = new UnionLargeListReader(this);
+ }
+ return reader;
+ }
+
+ /**
+ * Initialize the data vector (and execute callback) if it hasn't already been done,
+ * returns the data vector.
+ */
+ public <T extends ValueVector> AddOrGetResult<T> addOrGetVector(FieldType fieldType) {
+ boolean created = false;
+ if (vector instanceof NullVector) {
+ vector = fieldType.createNewSingleVector(defaultDataVectorName, allocator, callBack);
+ // returned vector must have the same field
+ created = true;
+ if (callBack != null &&
+ // not a schema change if changing from ZeroVector to ZeroVector
+ (fieldType.getType().getTypeID() != ArrowType.ArrowTypeID.Null)) {
+ callBack.doWork();
+ }
+ }
+
+ if (vector.getField().getType().getTypeID() != fieldType.getType().getTypeID()) {
+ final String msg = String.format("Inner vector type mismatch. Requested type: [%s], actual type: [%s]",
+ fieldType.getType().getTypeID(), vector.getField().getType().getTypeID());
+ throw new SchemaChangeRuntimeException(msg);
+ }
+
+ invalidateReader();
+ return new AddOrGetResult<>((T) vector, created);
+ }
+
+ /**
+ * Get the size (number of bytes) of underlying buffers used by this
+ * vector.
+ * @return size of underlying buffers.
+ */
+ @Override
+ public int getBufferSize() {
+ if (valueCount == 0) {
+ return 0;
+ }
+ final int offsetBufferSize = (valueCount + 1) * OFFSET_WIDTH;
+ final int validityBufferSize = getValidityBufferSizeFromCount(valueCount);
+ return offsetBufferSize + validityBufferSize + vector.getBufferSize();
+ }
+
+ @Override
+ public int getBufferSizeFor(int valueCount) {
+ if (valueCount == 0) {
+ return 0;
+ }
+ final int validityBufferSize = getValidityBufferSizeFromCount(valueCount);
+ long innerVectorValueCount = offsetBuffer.getLong((long) valueCount * OFFSET_WIDTH);
+
+ return ((valueCount + 1) * OFFSET_WIDTH) +
+ vector.getBufferSizeFor(checkedCastToInt(innerVectorValueCount)) +
+ validityBufferSize;
+ }
+
+ @Override
+ public Field getField() {
+ return new Field(getName(), fieldType, Collections.singletonList(getDataVector().getField()));
+ }
+
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.LARGELIST;
+ }
+
+ @Override
+ public String getName() {
+ return name;
+ }
+
+ @Override
+ public void clear() {
+ offsetBuffer = releaseBuffer(offsetBuffer);
+ vector.clear();
+ valueCount = 0;
+ super.clear();
+ validityBuffer = releaseBuffer(validityBuffer);
+ lastSet = -1;
+ }
+
+ @Override
+ public void reset() {
+ offsetBuffer.setZero(0, offsetBuffer.capacity());
+ vector.reset();
+ valueCount = 0;
+ validityBuffer.setZero(0, validityBuffer.capacity());
+ lastSet = -1;
+ }
+
+ /**
+ * Return the underlying buffers associated with this vector. Note that this doesn't
+ * impact the reference counts for this buffer so it only should be used for in-context
+ * access. Also note that this buffer changes regularly thus
+ * external classes shouldn't hold a reference to it (unless they change it).
+ *
+ * @param clear Whether to clear vector before returning; the buffers will still be refcounted
+ * but the returned array will be the only reference to them
+ * @return The underlying {@link ArrowBuf buffers} that is used by this
+ * vector instance.
+ */
+ @Override
+ public ArrowBuf[] getBuffers(boolean clear) {
+ setReaderAndWriterIndex();
+ final ArrowBuf[] buffers;
+ if (getBufferSize() == 0) {
+ buffers = new ArrowBuf[0];
+ } else {
+ List<ArrowBuf> list = new ArrayList<>();
+ list.add(offsetBuffer);
+ list.add(validityBuffer);
+ list.addAll(Arrays.asList(vector.getBuffers(false)));
+ buffers = list.toArray(new ArrowBuf[list.size()]);
+ }
+ if (clear) {
+ for (ArrowBuf buffer : buffers) {
+ buffer.getReferenceManager().retain();
+ }
+ clear();
+ }
+ return buffers;
+ }
+
+ protected void invalidateReader() {
+ reader = null;
+ }
+
+ /**
+ * Get the element in the list vector at a particular index.
+ * @param index position of the element
+ * @return Object at given position
+ */
+ @Override
+ public List<?> getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ }
+ final List<Object> vals = new JsonStringArrayList<>();
+ final long start = offsetBuffer.getLong((long) index * OFFSET_WIDTH);
+ final long end = offsetBuffer.getLong(((long) index + 1L) * OFFSET_WIDTH);
+ final ValueVector vv = getDataVector();
+ for (long i = start; i < end; i++) {
+ vals.add(vv.getObject(checkedCastToInt(i)));
+ }
+
+ return vals;
+ }
+
+ /**
+ * Check if element at given index is null.
+ *
+ * @param index position of element
+ * @return true if element at given index is null, false otherwise
+ */
+ @Override
+ public boolean isNull(int index) {
+ return (isSet(index) == 0);
+ }
+
+ /**
+ * Check if element at given index is empty list.
+ * @param index position of element
+ * @return true if element at given index is empty list or NULL, false otherwise
+ */
+ public boolean isEmpty(int index) {
+ if (isNull(index)) {
+ return true;
+ } else {
+ final long start = offsetBuffer.getLong((long) index * OFFSET_WIDTH);
+ final long end = offsetBuffer.getLong(((long) index + 1L) * OFFSET_WIDTH);
+ return start == end;
+ }
+ }
+
+ /**
+ * Same as {@link #isNull(int)}.
+ *
+ * @param index position of element
+ * @return 1 if element at given index is not null, 0 otherwise
+ */
+ public int isSet(int index) {
+ final int byteIndex = index >> 3;
+ final byte b = validityBuffer.getByte(byteIndex);
+ final int bitIndex = index & 7;
+ return (b >> bitIndex) & 0x01;
+ }
+
+ /**
+ * Get the number of elements that are null in the vector.
+ *
+ * @return the number of null elements.
+ */
+ @Override
+ public int getNullCount() {
+ return BitVectorHelper.getNullCount(validityBuffer, valueCount);
+ }
+
+ /**
+ * Get the current value capacity for the vector.
+ * @return number of elements that vector can hold.
+ */
+ @Override
+ public int getValueCapacity() {
+ return getValidityAndOffsetValueCapacity();
+ }
+
+ protected int getOffsetBufferValueCapacity() {
+ return checkedCastToInt(offsetBuffer.capacity() / OFFSET_WIDTH);
+ }
+
+ private int getValidityAndOffsetValueCapacity() {
+ final int offsetValueCapacity = Math.max(getOffsetBufferValueCapacity() - 1, 0);
+ return Math.min(offsetValueCapacity, getValidityBufferValueCapacity());
+ }
+
+ private int getValidityBufferValueCapacity() {
+ return capAtMaxInt(validityBuffer.capacity() * 8);
+ }
+
+ /**
+ * Sets the list at index to be not-null. Reallocates validity buffer if index
+ * is larger than current capacity.
+ */
+ public void setNotNull(int index) {
+ while (index >= getValidityAndOffsetValueCapacity()) {
+ reallocValidityAndOffsetBuffers();
+ }
+ BitVectorHelper.setBit(validityBuffer, index);
+ lastSet = index;
+ }
+
+ /**
+ * Sets list at index to be null.
+ * @param index position in vector
+ */
+ public void setNull(int index) {
+ while (index >= getValidityAndOffsetValueCapacity()) {
+ reallocValidityAndOffsetBuffers();
+ }
+ if (lastSet >= index) {
+ lastSet = index - 1;
+ }
+ for (int i = lastSet + 1; i <= index; i++) {
+ final int currentOffset = offsetBuffer.getInt(i * OFFSET_WIDTH);
+ offsetBuffer.setInt((i + 1) * OFFSET_WIDTH, currentOffset);
+ }
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+
+ /**
+ * Start a new value in the list vector.
+ *
+ * @param index index of the value to start
+ */
+ public long startNewValue(long index) {
+ while (index >= getValidityAndOffsetValueCapacity()) {
+ reallocValidityAndOffsetBuffers();
+ }
+ for (int i = lastSet + 1; i <= index; i++) {
+ final long currentOffset = offsetBuffer.getLong((long) i * OFFSET_WIDTH);
+ offsetBuffer.setLong(((long) i + 1L) * OFFSET_WIDTH, currentOffset);
+ }
+ BitVectorHelper.setBit(validityBuffer, index);
+ lastSet = checkedCastToInt(index);
+ return offsetBuffer.getLong(((long) lastSet + 1L) * OFFSET_WIDTH);
+ }
+
+ /**
+ * End the current value.
+ *
+ * @param index index of the value to end
+ * @param size number of elements in the list that was written
+ */
+ public void endValue(int index, long size) {
+ final long currentOffset = offsetBuffer.getLong(((long) index + 1L) * OFFSET_WIDTH);
+ offsetBuffer.setLong(((long) index + 1L) * OFFSET_WIDTH, currentOffset + size);
+ }
+
+ /**
+ * Sets the value count for the vector.
+ *
+ * <p>
+ * Important note: The underlying vector does not support 64-bit
+ * allocations yet. This may throw if attempting to hold larger
+ * than what a 32-bit vector can store.
+ * </p>
+ *
+ * @param valueCount value count
+ */
+ @Override
+ public void setValueCount(int valueCount) {
+ this.valueCount = valueCount;
+ if (valueCount > 0) {
+ while (valueCount > getValidityAndOffsetValueCapacity()) {
+ /* check if validity and offset buffers need to be re-allocated */
+ reallocValidityAndOffsetBuffers();
+ }
+ for (int i = lastSet + 1; i < valueCount; i++) {
+ /* fill the holes with offsets */
+ final long currentOffset = offsetBuffer.getLong((long) i * OFFSET_WIDTH);
+ offsetBuffer.setLong(((long) i + 1L) * OFFSET_WIDTH, currentOffset);
+ }
+ }
+ /* valueCount for the data vector is the current end offset */
+ final long childValueCount = (valueCount == 0) ? 0 :
+ offsetBuffer.getLong(((long) lastSet + 1L) * OFFSET_WIDTH);
+ /* set the value count of data vector and this will take care of
+ * checking whether data buffer needs to be reallocated.
+ * TODO: revisit when 64-bit vectors are supported
+ */
+ Preconditions.checkArgument(childValueCount <= Integer.MAX_VALUE || childValueCount >= Integer.MIN_VALUE,
+ "LargeListVector doesn't yet support 64-bit allocations: %s", childValueCount);
+ vector.setValueCount((int) childValueCount);
+ }
+
+ public void setLastSet(int value) {
+ lastSet = value;
+ }
+
+ public int getLastSet() {
+ return lastSet;
+ }
+
+ public long getElementStartIndex(int index) {
+ return offsetBuffer.getLong((long) index * OFFSET_WIDTH);
+ }
+
+ public long getElementEndIndex(int index) {
+ return offsetBuffer.getLong(((long) index + 1L) * OFFSET_WIDTH);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java
new file mode 100644
index 000000000..b5b32951a
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java
@@ -0,0 +1,879 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex;
+
+import static java.util.Collections.singletonList;
+import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt;
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+import static org.apache.arrow.util.Preconditions.checkNotNull;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.OutOfMemoryException;
+import org.apache.arrow.memory.util.ArrowBufPointer;
+import org.apache.arrow.memory.util.ByteFunctionHelpers;
+import org.apache.arrow.memory.util.CommonUtil;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.AddOrGetResult;
+import org.apache.arrow.vector.BitVectorHelper;
+import org.apache.arrow.vector.BufferBacked;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.ZeroVector;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.complex.impl.ComplexCopier;
+import org.apache.arrow.vector.complex.impl.UnionListReader;
+import org.apache.arrow.vector.complex.impl.UnionListWriter;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.complex.writer.FieldWriter;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.JsonStringArrayList;
+import org.apache.arrow.vector.util.OversizedAllocationException;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * A list vector contains lists of a specific type of elements. Its structure contains 3 elements.
+ * <ol>
+ * <li>A validity buffer.</li>
+ * <li> An offset buffer, that denotes lists boundaries. </li>
+ * <li> A child data vector that contains the elements of lists. </li>
+ * </ol>
+ * The latter two are managed by its superclass.
+ */
+public class ListVector extends BaseRepeatedValueVector implements PromotableVector {
+
+ public static ListVector empty(String name, BufferAllocator allocator) {
+ return new ListVector(name, allocator, FieldType.nullable(ArrowType.List.INSTANCE), null);
+ }
+
+ protected ArrowBuf validityBuffer;
+ protected UnionListReader reader;
+ private CallBack callBack;
+ private final FieldType fieldType;
+ private int validityAllocationSizeInBytes;
+
+ /**
+ * The maximum index that is actually set.
+ */
+ private int lastSet;
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param name The name of the instance.
+ * @param allocator The allocator to use for allocating/reallocating buffers.
+ * @param fieldType The type of this list.
+ * @param callBack A schema change callback.
+ */
+ public ListVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) {
+ super(name, allocator, callBack);
+ this.validityBuffer = allocator.getEmpty();
+ this.fieldType = checkNotNull(fieldType);
+ this.callBack = callBack;
+ this.validityAllocationSizeInBytes = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION);
+ this.lastSet = -1;
+ }
+
+ @Override
+ public void initializeChildrenFromFields(List<Field> children) {
+ if (children.size() != 1) {
+ throw new IllegalArgumentException("Lists have only one child. Found: " + children);
+ }
+ Field field = children.get(0);
+ AddOrGetResult<FieldVector> addOrGetVector = addOrGetVector(field.getFieldType());
+ if (!addOrGetVector.isCreated()) {
+ throw new IllegalArgumentException("Child vector already existed: " + addOrGetVector.getVector());
+ }
+
+ addOrGetVector.getVector().initializeChildrenFromFields(field.getChildren());
+ }
+
+ @Override
+ public void setInitialCapacity(int numRecords) {
+ validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords);
+ super.setInitialCapacity(numRecords);
+ }
+
+ /**
+ * Specialized version of setInitialCapacity() for ListVector. This is
+ * used by some callers when they want to explicitly control and be
+ * conservative about memory allocated for inner data vector. This is
+ * very useful when we are working with memory constraints for a query
+ * and have a fixed amount of memory reserved for the record batch. In
+ * such cases, we are likely to face OOM or related problems when
+ * we reserve memory for a record batch with value count x and
+ * do setInitialCapacity(x) such that each vector allocates only
+ * what is necessary and not the default amount but the multiplier
+ * forces the memory requirement to go beyond what was needed.
+ *
+ * @param numRecords value count
+ * @param density density of ListVector. Density is the average size of
+ * list per position in the List vector. For example, a
+ * density value of 10 implies each position in the list
+ * vector has a list of 10 values.
+ * A density value of 0.1 implies out of 10 positions in
+ * the list vector, 1 position has a list of size 1 and
+ * remaining positions are null (no lists) or empty lists.
+ * This helps in tightly controlling the memory we provision
+ * for inner data vector.
+ */
+ @Override
+ public void setInitialCapacity(int numRecords, double density) {
+ validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords);
+ super.setInitialCapacity(numRecords, density);
+ }
+
+ /**
+ * Get the density of this ListVector.
+ * @return density
+ */
+ public double getDensity() {
+ if (valueCount == 0) {
+ return 0.0D;
+ }
+ final int startOffset = offsetBuffer.getInt(0);
+ final int endOffset = offsetBuffer.getInt(valueCount * OFFSET_WIDTH);
+ final double totalListSize = endOffset - startOffset;
+ return totalListSize / valueCount;
+ }
+
+ @Override
+ public List<FieldVector> getChildrenFromFields() {
+ return singletonList(getDataVector());
+ }
+
+ /**
+ * Load the buffers of this vector with provided source buffers.
+ * The caller manages the source buffers and populates them before invoking
+ * this method.
+ * @param fieldNode the fieldNode indicating the value count
+ * @param ownBuffers the buffers for this Field (own buffers only, children not included)
+ */
+ @Override
+ public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers) {
+ if (ownBuffers.size() != 2) {
+ throw new IllegalArgumentException("Illegal buffer count, expected " + 2 + ", got: " + ownBuffers.size());
+ }
+
+ ArrowBuf bitBuffer = ownBuffers.get(0);
+ ArrowBuf offBuffer = ownBuffers.get(1);
+
+ validityBuffer.getReferenceManager().release();
+ validityBuffer = BitVectorHelper.loadValidityBuffer(fieldNode, bitBuffer, allocator);
+ offsetBuffer.getReferenceManager().release();
+ offsetBuffer = offBuffer.getReferenceManager().retain(offBuffer, allocator);
+
+ validityAllocationSizeInBytes = checkedCastToInt(validityBuffer.capacity());
+ offsetAllocationSizeInBytes = offsetBuffer.capacity();
+
+ lastSet = fieldNode.getLength() - 1;
+ valueCount = fieldNode.getLength();
+ }
+
+ /**
+ * Get the buffers belonging to this vector.
+ * @return the inner buffers.
+ */
+ @Override
+ public List<ArrowBuf> getFieldBuffers() {
+ List<ArrowBuf> result = new ArrayList<>(2);
+ setReaderAndWriterIndex();
+ result.add(validityBuffer);
+ result.add(offsetBuffer);
+
+ return result;
+ }
+
+ /**
+ * Set the reader and writer indexes for the inner buffers.
+ */
+ private void setReaderAndWriterIndex() {
+ validityBuffer.readerIndex(0);
+ offsetBuffer.readerIndex(0);
+ if (valueCount == 0) {
+ validityBuffer.writerIndex(0);
+ offsetBuffer.writerIndex(0);
+ } else {
+ validityBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount));
+ offsetBuffer.writerIndex((valueCount + 1) * OFFSET_WIDTH);
+ }
+ }
+
+ /**
+ * Get the inner vectors.
+ *
+ * @deprecated This API will be removed as the current implementations no longer support inner vectors.
+ *
+ * @return the inner vectors for this field as defined by the TypeLayout
+ */
+ @Deprecated
+ @Override
+ public List<BufferBacked> getFieldInnerVectors() {
+ throw new UnsupportedOperationException("There are no inner vectors. Use getFieldBuffers");
+ }
+
+ public UnionListWriter getWriter() {
+ return new UnionListWriter(this);
+ }
+
+ /**
+ * Same as {@link #allocateNewSafe()}.
+ */
+ @Override
+ public void allocateNew() throws OutOfMemoryException {
+ if (!allocateNewSafe()) {
+ throw new OutOfMemoryException("Failure while allocating memory");
+ }
+ }
+
+ /**
+ * Allocate memory for the vector. We internally use a default value count
+ * of 4096 to allocate memory for at least these many elements in the
+ * vector.
+ *
+ * @return false if memory allocation fails, true otherwise.
+ */
+ public boolean allocateNewSafe() {
+ boolean success = false;
+ try {
+ /* we are doing a new allocation -- release the current buffers */
+ clear();
+ /* allocate validity buffer */
+ allocateValidityBuffer(validityAllocationSizeInBytes);
+ /* allocate offset and data buffer */
+ success = super.allocateNewSafe();
+ } finally {
+ if (!success) {
+ clear();
+ return false;
+ }
+ }
+ return true;
+ }
+
+ private void allocateValidityBuffer(final long size) {
+ final int curSize = (int) size;
+ validityBuffer = allocator.buffer(curSize);
+ validityBuffer.readerIndex(0);
+ validityAllocationSizeInBytes = curSize;
+ validityBuffer.setZero(0, validityBuffer.capacity());
+ }
+
+ /**
+ * Resize the vector to increase the capacity. The internal behavior is to
+ * double the current value capacity.
+ */
+ @Override
+ public void reAlloc() {
+ /* reallocate the validity buffer */
+ reallocValidityBuffer();
+ /* reallocate the offset and data */
+ super.reAlloc();
+ }
+
+ private void reallocValidityAndOffsetBuffers() {
+ reallocOffsetBuffer();
+ reallocValidityBuffer();
+ }
+
+ private void reallocValidityBuffer() {
+ final int currentBufferCapacity = checkedCastToInt(validityBuffer.capacity());
+ long newAllocationSize = currentBufferCapacity * 2;
+ if (newAllocationSize == 0) {
+ if (validityAllocationSizeInBytes > 0) {
+ newAllocationSize = validityAllocationSizeInBytes;
+ } else {
+ newAllocationSize = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION) * 2;
+ }
+ }
+ newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize);
+ assert newAllocationSize >= 1;
+
+ if (newAllocationSize > MAX_ALLOCATION_SIZE) {
+ throw new OversizedAllocationException("Unable to expand the buffer");
+ }
+
+ final ArrowBuf newBuf = allocator.buffer((int) newAllocationSize);
+ newBuf.setBytes(0, validityBuffer, 0, currentBufferCapacity);
+ newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity);
+ validityBuffer.getReferenceManager().release(1);
+ validityBuffer = newBuf;
+ validityAllocationSizeInBytes = (int) newAllocationSize;
+ }
+
+ /**
+ * Same as {@link #copyFrom(int, int, ValueVector)} except that
+ * it handles the case when the capacity of the vector needs to be expanded
+ * before copy.
+ * @param inIndex position to copy from in source vector
+ * @param outIndex position to copy to in this vector
+ * @param from source vector
+ */
+ @Override
+ public void copyFromSafe(int inIndex, int outIndex, ValueVector from) {
+ copyFrom(inIndex, outIndex, from);
+ }
+
+ /**
+ * Copy a cell value from a particular index in source vector to a particular
+ * position in this vector.
+ * @param inIndex position to copy from in source vector
+ * @param outIndex position to copy to in this vector
+ * @param from source vector
+ */
+ @Override
+ public void copyFrom(int inIndex, int outIndex, ValueVector from) {
+ Preconditions.checkArgument(this.getMinorType() == from.getMinorType());
+ FieldReader in = from.getReader();
+ in.setPosition(inIndex);
+ FieldWriter out = getWriter();
+ out.setPosition(outIndex);
+ ComplexCopier.copy(in, out);
+ }
+
+ /**
+ * Get the inner data vector for this list vector.
+ * @return data vector
+ */
+ @Override
+ public FieldVector getDataVector() {
+ return vector;
+ }
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return getTransferPair(ref, allocator, null);
+ }
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) {
+ return new TransferImpl(ref, allocator, callBack);
+ }
+
+ @Override
+ public TransferPair makeTransferPair(ValueVector target) {
+ return new TransferImpl((ListVector) target);
+ }
+
+ @Override
+ public long getValidityBufferAddress() {
+ return (validityBuffer.memoryAddress());
+ }
+
+ @Override
+ public long getDataBufferAddress() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long getOffsetBufferAddress() {
+ return (offsetBuffer.memoryAddress());
+ }
+
+ @Override
+ public ArrowBuf getValidityBuffer() {
+ return validityBuffer;
+ }
+
+ @Override
+ public ArrowBuf getDataBuffer() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public ArrowBuf getOffsetBuffer() {
+ return offsetBuffer;
+ }
+
+ @Override
+ public int hashCode(int index) {
+ return hashCode(index, null);
+ }
+
+ @Override
+ public int hashCode(int index, ArrowBufHasher hasher) {
+ if (isSet(index) == 0) {
+ return ArrowBufPointer.NULL_HASH_CODE;
+ }
+ int hash = 0;
+ final int start = offsetBuffer.getInt(index * OFFSET_WIDTH);
+ final int end = offsetBuffer.getInt((index + 1) * OFFSET_WIDTH);
+ for (int i = start; i < end; i++) {
+ hash = ByteFunctionHelpers.combineHash(hash, vector.hashCode(i, hasher));
+ }
+ return hash;
+ }
+
+ @Override
+ public <OUT, IN> OUT accept(VectorVisitor<OUT, IN> visitor, IN value) {
+ return visitor.visit(this, value);
+ }
+
+ private class TransferImpl implements TransferPair {
+
+ ListVector to;
+ TransferPair dataTransferPair;
+
+ public TransferImpl(String name, BufferAllocator allocator, CallBack callBack) {
+ this(new ListVector(name, allocator, fieldType, callBack));
+ }
+
+ public TransferImpl(ListVector to) {
+ this.to = to;
+ to.addOrGetVector(vector.getField().getFieldType());
+ if (to.getDataVector() instanceof ZeroVector) {
+ to.addOrGetVector(vector.getField().getFieldType());
+ }
+ dataTransferPair = getDataVector().makeTransferPair(to.getDataVector());
+ }
+
+ /**
+ * Transfer this vector'data to another vector. The memory associated
+ * with this vector is transferred to the allocator of target vector
+ * for accounting and management purposes.
+ */
+ @Override
+ public void transfer() {
+ to.clear();
+ dataTransferPair.transfer();
+ to.validityBuffer = transferBuffer(validityBuffer, to.allocator);
+ to.offsetBuffer = transferBuffer(offsetBuffer, to.allocator);
+ to.lastSet = lastSet;
+ if (valueCount > 0) {
+ to.setValueCount(valueCount);
+ }
+ clear();
+ }
+
+ /**
+ * Slice this vector at desired index and length and transfer the
+ * corresponding data to the target vector.
+ * @param startIndex start position of the split in source vector.
+ * @param length length of the split.
+ */
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ Preconditions.checkArgument(startIndex >= 0 && length >= 0 && startIndex + length <= valueCount,
+ "Invalid parameters startIndex: %s, length: %s for valueCount: %s", startIndex, length, valueCount);
+ final int startPoint = offsetBuffer.getInt(startIndex * OFFSET_WIDTH);
+ final int sliceLength = offsetBuffer.getInt((startIndex + length) * OFFSET_WIDTH) - startPoint;
+ to.clear();
+ to.allocateOffsetBuffer((length + 1) * OFFSET_WIDTH);
+ /* splitAndTransfer offset buffer */
+ for (int i = 0; i < length + 1; i++) {
+ final int relativeOffset = offsetBuffer.getInt((startIndex + i) * OFFSET_WIDTH) - startPoint;
+ to.offsetBuffer.setInt(i * OFFSET_WIDTH, relativeOffset);
+ }
+ /* splitAndTransfer validity buffer */
+ splitAndTransferValidityBuffer(startIndex, length, to);
+ /* splitAndTransfer data buffer */
+ dataTransferPair.splitAndTransfer(startPoint, sliceLength);
+ to.lastSet = length - 1;
+ to.setValueCount(length);
+ }
+
+ /*
+ * transfer the validity.
+ */
+ private void splitAndTransferValidityBuffer(int startIndex, int length, ListVector target) {
+ int firstByteSource = BitVectorHelper.byteIndex(startIndex);
+ int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1);
+ int byteSizeTarget = getValidityBufferSizeFromCount(length);
+ int offset = startIndex % 8;
+
+ if (length > 0) {
+ if (offset == 0) {
+ // slice
+ if (target.validityBuffer != null) {
+ target.validityBuffer.getReferenceManager().release();
+ }
+ target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget);
+ target.validityBuffer.getReferenceManager().retain(1);
+ } else {
+ /* Copy data
+ * When the first bit starts from the middle of a byte (offset != 0),
+ * copy data from src BitVector.
+ * Each byte in the target is composed by a part in i-th byte,
+ * another part in (i+1)-th byte.
+ */
+ target.allocateValidityBuffer(byteSizeTarget);
+
+ for (int i = 0; i < byteSizeTarget - 1; i++) {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(validityBuffer, firstByteSource + i, offset);
+ byte b2 = BitVectorHelper.getBitsFromNextByte(validityBuffer, firstByteSource + i + 1, offset);
+
+ target.validityBuffer.setByte(i, (b1 + b2));
+ }
+
+ /* Copying the last piece is done in the following manner:
+ * if the source vector has 1 or more bytes remaining, we copy
+ * the last piece as a byte formed by shifting data
+ * from the current byte and the next byte.
+ *
+ * if the source vector has no more bytes remaining
+ * (we are at the last byte), we copy the last piece as a byte
+ * by shifting data from the current byte.
+ */
+ if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(validityBuffer,
+ firstByteSource + byteSizeTarget - 1, offset);
+ byte b2 = BitVectorHelper.getBitsFromNextByte(validityBuffer,
+ firstByteSource + byteSizeTarget, offset);
+
+ target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2);
+ } else {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(validityBuffer,
+ firstByteSource + byteSizeTarget - 1, offset);
+ target.validityBuffer.setByte(byteSizeTarget - 1, b1);
+ }
+ }
+ }
+ }
+
+ @Override
+ public ValueVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void copyValueSafe(int from, int to) {
+ this.to.copyFrom(from, to, ListVector.this);
+ }
+ }
+
+ @Override
+ public UnionListReader getReader() {
+ if (reader == null) {
+ reader = new UnionListReader(this);
+ }
+ return reader;
+ }
+
+ /** Initialize the child data vector to field type. */
+ public <T extends ValueVector> AddOrGetResult<T> addOrGetVector(FieldType fieldType) {
+ AddOrGetResult<T> result = super.addOrGetVector(fieldType);
+ invalidateReader();
+ return result;
+ }
+
+ /**
+ * Get the size (number of bytes) of underlying buffers used by this
+ * vector.
+ * @return size of underlying buffers.
+ */
+ @Override
+ public int getBufferSize() {
+ if (valueCount == 0) {
+ return 0;
+ }
+ final int offsetBufferSize = (valueCount + 1) * OFFSET_WIDTH;
+ final int validityBufferSize = getValidityBufferSizeFromCount(valueCount);
+ return offsetBufferSize + validityBufferSize + vector.getBufferSize();
+ }
+
+ @Override
+ public int getBufferSizeFor(int valueCount) {
+ if (valueCount == 0) {
+ return 0;
+ }
+ final int validityBufferSize = getValidityBufferSizeFromCount(valueCount);
+
+ return super.getBufferSizeFor(valueCount) + validityBufferSize;
+ }
+
+ @Override
+ public Field getField() {
+ return new Field(getName(), fieldType, Collections.singletonList(getDataVector().getField()));
+ }
+
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.LIST;
+ }
+
+ @Override
+ public void clear() {
+ super.clear();
+ validityBuffer = releaseBuffer(validityBuffer);
+ lastSet = -1;
+ }
+
+ @Override
+ public void reset() {
+ super.reset();
+ validityBuffer.setZero(0, validityBuffer.capacity());
+ lastSet = -1;
+ }
+
+ /**
+ * Return the underlying buffers associated with this vector. Note that this doesn't
+ * impact the reference counts for this buffer so it only should be used for in-context
+ * access. Also note that this buffer changes regularly thus
+ * external classes shouldn't hold a reference to it (unless they change it).
+ *
+ * @param clear Whether to clear vector before returning; the buffers will still be refcounted
+ * but the returned array will be the only reference to them
+ * @return The underlying {@link ArrowBuf buffers} that is used by this
+ * vector instance.
+ */
+ @Override
+ public ArrowBuf[] getBuffers(boolean clear) {
+ setReaderAndWriterIndex();
+ final ArrowBuf[] buffers;
+ if (getBufferSize() == 0) {
+ buffers = new ArrowBuf[0];
+ } else {
+ List<ArrowBuf> list = new ArrayList<>();
+ list.add(offsetBuffer);
+ list.add(validityBuffer);
+ list.addAll(Arrays.asList(vector.getBuffers(false)));
+ buffers = list.toArray(new ArrowBuf[list.size()]);
+ }
+ if (clear) {
+ for (ArrowBuf buffer : buffers) {
+ buffer.getReferenceManager().retain();
+ }
+ clear();
+ }
+ return buffers;
+ }
+
+ @Override
+ public UnionVector promoteToUnion() {
+ UnionVector vector = new UnionVector("$data$", allocator, /* field type*/ null, callBack);
+ replaceDataVector(vector);
+ invalidateReader();
+ if (callBack != null) {
+ callBack.doWork();
+ }
+ return vector;
+ }
+
+ protected void invalidateReader() {
+ reader = null;
+ }
+
+ /**
+ * Get the element in the list vector at a particular index.
+ * @param index position of the element
+ * @return Object at given position
+ */
+ @Override
+ public List<?> getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ }
+ final List<Object> vals = new JsonStringArrayList<>();
+ final int start = offsetBuffer.getInt(index * OFFSET_WIDTH);
+ final int end = offsetBuffer.getInt((index + 1) * OFFSET_WIDTH);
+ final ValueVector vv = getDataVector();
+ for (int i = start; i < end; i++) {
+ vals.add(vv.getObject(i));
+ }
+
+ return vals;
+ }
+
+ /**
+ * Check if element at given index is null.
+ *
+ * @param index position of element
+ * @return true if element at given index is null, false otherwise
+ */
+ @Override
+ public boolean isNull(int index) {
+ return (isSet(index) == 0);
+ }
+
+ /**
+ * Check if element at given index is empty list.
+ * @param index position of element
+ * @return true if element at given index is empty list or NULL, false otherwise
+ */
+ @Override
+ public boolean isEmpty(int index) {
+ if (isNull(index)) {
+ return true;
+ } else {
+ final int start = offsetBuffer.getInt(index * OFFSET_WIDTH);
+ final int end = offsetBuffer.getInt((index + 1) * OFFSET_WIDTH);
+ return start == end;
+ }
+ }
+
+ /**
+ * Same as {@link #isNull(int)}.
+ *
+ * @param index position of element
+ * @return 1 if element at given index is not null, 0 otherwise
+ */
+ public int isSet(int index) {
+ final int byteIndex = index >> 3;
+ final byte b = validityBuffer.getByte(byteIndex);
+ final int bitIndex = index & 7;
+ return (b >> bitIndex) & 0x01;
+ }
+
+ /**
+ * Get the number of elements that are null in the vector.
+ *
+ * @return the number of null elements.
+ */
+ @Override
+ public int getNullCount() {
+ return BitVectorHelper.getNullCount(validityBuffer, valueCount);
+ }
+
+ /**
+ * Get the current value capacity for the vector.
+ * @return number of elements that vector can hold.
+ */
+ @Override
+ public int getValueCapacity() {
+ return getValidityAndOffsetValueCapacity();
+ }
+
+ private int getValidityAndOffsetValueCapacity() {
+ final int offsetValueCapacity = Math.max(getOffsetBufferValueCapacity() - 1, 0);
+ return Math.min(offsetValueCapacity, getValidityBufferValueCapacity());
+ }
+
+ private int getValidityBufferValueCapacity() {
+ return capAtMaxInt(validityBuffer.capacity() * 8);
+ }
+
+ /**
+ * Sets the list at index to be not-null. Reallocates validity buffer if index
+ * is larger than current capacity.
+ */
+ public void setNotNull(int index) {
+ while (index >= getValidityAndOffsetValueCapacity()) {
+ reallocValidityAndOffsetBuffers();
+ }
+ BitVectorHelper.setBit(validityBuffer, index);
+ lastSet = index;
+ }
+
+ /**
+ * Sets list at index to be null.
+ * @param index position in vector
+ */
+ public void setNull(int index) {
+ while (index >= getValidityAndOffsetValueCapacity()) {
+ reallocValidityAndOffsetBuffers();
+ }
+ if (lastSet >= index) {
+ lastSet = index - 1;
+ }
+ for (int i = lastSet + 1; i <= index; i++) {
+ final int currentOffset = offsetBuffer.getInt(i * OFFSET_WIDTH);
+ offsetBuffer.setInt((i + 1) * OFFSET_WIDTH, currentOffset);
+ }
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+
+ /**
+ * Start a new value in the list vector.
+ *
+ * @param index index of the value to start
+ */
+ @Override
+ public int startNewValue(int index) {
+ while (index >= getValidityAndOffsetValueCapacity()) {
+ reallocValidityAndOffsetBuffers();
+ }
+ if (lastSet >= index) {
+ lastSet = index - 1;
+ }
+ for (int i = lastSet + 1; i <= index; i++) {
+ final int currentOffset = offsetBuffer.getInt(i * OFFSET_WIDTH);
+ offsetBuffer.setInt((i + 1) * OFFSET_WIDTH, currentOffset);
+ }
+ BitVectorHelper.setBit(validityBuffer, index);
+ lastSet = index;
+ return offsetBuffer.getInt((lastSet + 1) * OFFSET_WIDTH);
+ }
+
+ /**
+ * End the current value.
+ *
+ * @param index index of the value to end
+ * @param size number of elements in the list that was written
+ */
+ public void endValue(int index, int size) {
+ final int currentOffset = offsetBuffer.getInt((index + 1) * OFFSET_WIDTH);
+ offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, currentOffset + size);
+ }
+
+ /**
+ * Sets the value count for the vector.
+ *
+ * @param valueCount value count
+ */
+ @Override
+ public void setValueCount(int valueCount) {
+ this.valueCount = valueCount;
+ if (valueCount > 0) {
+ while (valueCount > getValidityAndOffsetValueCapacity()) {
+ /* check if validity and offset buffers need to be re-allocated */
+ reallocValidityAndOffsetBuffers();
+ }
+ for (int i = lastSet + 1; i < valueCount; i++) {
+ /* fill the holes with offsets */
+ final int currentOffset = offsetBuffer.getInt(i * OFFSET_WIDTH);
+ offsetBuffer.setInt((i + 1) * OFFSET_WIDTH, currentOffset);
+ }
+ }
+ /* valueCount for the data vector is the current end offset */
+ final int childValueCount = (valueCount == 0) ? 0 :
+ offsetBuffer.getInt((lastSet + 1) * OFFSET_WIDTH);
+ /* set the value count of data vector and this will take care of
+ * checking whether data buffer needs to be reallocated.
+ */
+ vector.setValueCount(childValueCount);
+ }
+
+ public void setLastSet(int value) {
+ lastSet = value;
+ }
+
+ public int getLastSet() {
+ return lastSet;
+ }
+
+ @Override
+ public int getElementStartIndex(int index) {
+ return offsetBuffer.getInt(index * OFFSET_WIDTH);
+ }
+
+ @Override
+ public int getElementEndIndex(int index) {
+ return offsetBuffer.getInt((index + 1) * OFFSET_WIDTH);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java
new file mode 100644
index 000000000..d4275e6fe
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex;
+
+import static org.apache.arrow.util.Preconditions.checkArgument;
+
+import java.util.List;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.AddOrGetResult;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.complex.impl.UnionMapReader;
+import org.apache.arrow.vector.complex.impl.UnionMapWriter;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType.Map;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.CallBack;
+
+/**
+ * A MapVector is used to store entries of key/value pairs. It is a container vector that is
+ * composed of a list of struct values with "key" and "value" fields. The MapVector is nullable,
+ * but if a map is set at a given index, there must be an entry. In other words, the StructVector
+ * data is non-nullable. Also for a given entry, the "key" is non-nullable, however the "value" can
+ * be null.
+ */
+public class MapVector extends ListVector {
+
+ public static final String KEY_NAME = "key";
+ public static final String VALUE_NAME = "value";
+ public static final String DATA_VECTOR_NAME = "entries";
+
+ /**
+ * Construct an empty MapVector with no data. Child vectors must be added subsequently.
+ *
+ * @param name The name of the vector.
+ * @param allocator The allocator used for allocating/reallocating buffers.
+ * @param keysSorted True if the map keys have been pre-sorted.
+ * @return a new instance of MapVector.
+ */
+ public static MapVector empty(String name, BufferAllocator allocator, boolean keysSorted) {
+ return new MapVector(name, allocator, FieldType.nullable(new Map(keysSorted)), null);
+ }
+
+ /**
+ * Construct a MapVector instance.
+ *
+ * @param name The name of the vector.
+ * @param allocator The allocator used for allocating/reallocating buffers.
+ * @param fieldType The type definition of the MapVector.
+ * @param callBack A schema change callback.
+ */
+ public MapVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) {
+ super(name, allocator, fieldType, callBack);
+ defaultDataVectorName = DATA_VECTOR_NAME;
+ }
+
+ /**
+ * Initialize child vectors of the map from the given list of fields.
+ *
+ * @param children List of fields that will be children of this MapVector.
+ */
+ @Override
+ public void initializeChildrenFromFields(List<Field> children) {
+ checkArgument(children.size() == 1, "Maps have one List child. Found: %s", children);
+
+ Field structField = children.get(0);
+ MinorType minorType = Types.getMinorTypeForArrowType(structField.getType());
+ checkArgument(minorType == MinorType.STRUCT && !structField.isNullable(),
+ "Map data should be a non-nullable struct type");
+ checkArgument(structField.getChildren().size() == 2,
+ "Map data should be a struct with 2 children. Found: %s", children);
+
+ Field keyField = structField.getChildren().get(0);
+ checkArgument(!keyField.isNullable(), "Map data key type should be a non-nullable");
+
+ AddOrGetResult<FieldVector> addOrGetVector = addOrGetVector(structField.getFieldType());
+ checkArgument(addOrGetVector.isCreated(), "Child vector already existed: %s", addOrGetVector.getVector());
+
+ addOrGetVector.getVector().initializeChildrenFromFields(structField.getChildren());
+ }
+
+ /**
+ * Get the writer for this MapVector instance.
+ */
+ @Override
+ public UnionMapWriter getWriter() {
+ return new UnionMapWriter(this);
+ }
+
+ /**
+ * Get the reader for this MapVector instance.
+ */
+ @Override
+ public UnionMapReader getReader() {
+ if (reader == null) {
+ reader = new UnionMapReader(this);
+ }
+ return (UnionMapReader) reader;
+ }
+
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.MAP;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java
new file mode 100644
index 000000000..4da266812
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java
@@ -0,0 +1,440 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex;
+
+import static org.apache.arrow.util.Preconditions.checkNotNull;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.util.ByteFunctionHelpers;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.DensityAwareVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.complex.impl.SingleStructReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.ComplexHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.JsonStringHashMap;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * A struct vector that has no null values (and no validity buffer).
+ * Child Vectors are handled in {@link AbstractStructVector}.
+ */
+public class NonNullableStructVector extends AbstractStructVector {
+
+ public static NonNullableStructVector empty(String name, BufferAllocator allocator) {
+ FieldType fieldType = new FieldType(false, ArrowType.Struct.INSTANCE, null, null);
+ return new NonNullableStructVector(name, allocator, fieldType, null, ConflictPolicy.CONFLICT_REPLACE, false);
+ }
+
+ public static NonNullableStructVector emptyWithDuplicates(String name, BufferAllocator allocator) {
+ FieldType fieldType = new FieldType(false, ArrowType.Struct.INSTANCE, null, null);
+ return new NonNullableStructVector(name, allocator, fieldType, null, ConflictPolicy.CONFLICT_APPEND, true);
+ }
+
+ private final SingleStructReaderImpl reader = new SingleStructReaderImpl(this);
+ protected final FieldType fieldType;
+ public int valueCount;
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param name The name of the instance.
+ * @param allocator The allocator to use to allocating/reallocating buffers.
+ * @param fieldType The type of this list.
+ */
+ public NonNullableStructVector(String name,
+ BufferAllocator allocator,
+ FieldType fieldType,
+ CallBack callBack) {
+ super(name,
+ allocator,
+ callBack,
+ null,
+ true);
+ this.fieldType = checkNotNull(fieldType);
+ this.valueCount = 0;
+ }
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param name The name of the instance.
+ * @param allocator The allocator to use to allocating/reallocating buffers.
+ * @param fieldType The type of this list.
+ * @param callBack A schema change callback.
+ * @param conflictPolicy How to handle duplicate field names in the struct.
+ */
+ public NonNullableStructVector(String name,
+ BufferAllocator allocator,
+ FieldType fieldType,
+ CallBack callBack,
+ ConflictPolicy conflictPolicy,
+ boolean allowConflictPolicyChanges) {
+ super(name, allocator, callBack, conflictPolicy, allowConflictPolicyChanges);
+ this.fieldType = checkNotNull(fieldType);
+ this.valueCount = 0;
+ }
+
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ private transient StructTransferPair ephPair;
+
+ /**
+ * Copies the element at fromIndex in the provided vector to thisIndex. Reallocates buffers
+ * if thisIndex is larger then current capacity.
+ */
+ @Override
+ public void copyFrom(int fromIndex, int thisIndex, ValueVector from) {
+ Preconditions.checkArgument(this.getMinorType() == from.getMinorType());
+ if (ephPair == null || ephPair.from != from) {
+ ephPair = (StructTransferPair) from.makeTransferPair(this);
+ }
+ ephPair.copyValueSafe(fromIndex, thisIndex);
+ }
+
+ @Override
+ public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) {
+ copyFrom(fromIndex, thisIndex, from);
+ }
+
+ @Override
+ protected boolean supportsDirectRead() {
+ return true;
+ }
+
+ public Iterator<String> fieldNameIterator() {
+ return getChildFieldNames().iterator();
+ }
+
+ @Override
+ public void setInitialCapacity(int numRecords) {
+ for (final ValueVector v : this) {
+ v.setInitialCapacity(numRecords);
+ }
+ }
+
+ @Override
+ public void setInitialCapacity(int valueCount, double density) {
+ for (final ValueVector vector : this) {
+ if (vector instanceof DensityAwareVector) {
+ ((DensityAwareVector) vector).setInitialCapacity(valueCount, density);
+ } else {
+ vector.setInitialCapacity(valueCount);
+ }
+ }
+ }
+
+ @Override
+ public int getBufferSize() {
+ if (valueCount == 0 || size() == 0) {
+ return 0;
+ }
+ long buffer = 0;
+ for (final ValueVector v : this) {
+ buffer += v.getBufferSize();
+ }
+
+ return (int) buffer;
+ }
+
+ @Override
+ public int getBufferSizeFor(final int valueCount) {
+ if (valueCount == 0) {
+ return 0;
+ }
+
+ long bufferSize = 0;
+ for (final ValueVector v : this) {
+ bufferSize += v.getBufferSizeFor(valueCount);
+ }
+
+ return (int) bufferSize;
+ }
+
+ @Override
+ public ArrowBuf getValidityBuffer() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public ArrowBuf getDataBuffer() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public ArrowBuf getOffsetBuffer() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public TransferPair getTransferPair(BufferAllocator allocator) {
+ return getTransferPair(name, allocator, null);
+ }
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) {
+ return new StructTransferPair(this, new NonNullableStructVector(name,
+ allocator,
+ fieldType,
+ callBack,
+ getConflictPolicy(),
+ allowConflictPolicyChanges), false);
+ }
+
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new StructTransferPair(this, (NonNullableStructVector) to);
+ }
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new StructTransferPair(this, new NonNullableStructVector(ref,
+ allocator,
+ fieldType,
+ callBack,
+ getConflictPolicy(),
+ allowConflictPolicyChanges), false);
+ }
+
+ /**
+ * {@link TransferPair} for this this class.
+ */
+ protected static class StructTransferPair implements TransferPair {
+ private final TransferPair[] pairs;
+ private final NonNullableStructVector from;
+ private final NonNullableStructVector to;
+
+ public StructTransferPair(NonNullableStructVector from, NonNullableStructVector to) {
+ this(from, to, true);
+ }
+
+ protected StructTransferPair(NonNullableStructVector from, NonNullableStructVector to, boolean allocate) {
+ this.from = from;
+ this.to = to;
+ this.pairs = new TransferPair[from.size()];
+ this.to.ephPair = null;
+
+ int i = 0;
+ FieldVector vector;
+ for (String child : from.getChildFieldNames()) {
+ int preSize = to.size();
+ vector = from.getChild(child);
+ if (vector == null) {
+ continue;
+ }
+ //DRILL-1872: we add the child fields for the vector, looking up the field by name. For a map vector,
+ // the child fields may be nested fields of the top level child. For example if the structure
+ // of a child field is oa.oab.oabc then we add oa, then add oab to oa then oabc to oab.
+ // But the children member of a Materialized field is a HashSet. If the fields are added in the
+ // children HashSet, and the hashCode of the Materialized field includes the hash code of the
+ // children, the hashCode value of oa changes *after* the field has been added to the HashSet.
+ // (This is similar to what happens in ScanBatch where the children cannot be added till they are
+ // read). To take care of this, we ensure that the hashCode of the MaterializedField does not
+ // include the hashCode of the children but is based only on MaterializedField$key.
+ final FieldVector newVector = to.addOrGet(child, vector.getField().getFieldType(), vector.getClass());
+ if (allocate && to.size() != preSize) {
+ newVector.allocateNew();
+ }
+ pairs[i++] = vector.makeTransferPair(newVector);
+ }
+ }
+
+ @Override
+ public void transfer() {
+ for (final TransferPair p : pairs) {
+ p.transfer();
+ }
+ to.valueCount = from.valueCount;
+ from.clear();
+ }
+
+ @Override
+ public ValueVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void copyValueSafe(int from, int to) {
+ for (TransferPair p : pairs) {
+ p.copyValueSafe(from, to);
+ }
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ for (TransferPair p : pairs) {
+ p.splitAndTransfer(startIndex, length);
+ }
+ to.setValueCount(length);
+ }
+ }
+
+ @Override
+ public int getValueCapacity() {
+ if (size() == 0) {
+ return 0;
+ }
+
+ return getChildren().stream()
+ .mapToInt(child -> child.getValueCapacity())
+ .min()
+ .getAsInt();
+ }
+
+ @Override
+ public Map<String, ?> getObject(int index) {
+ Map<String, Object> vv = new JsonStringHashMap<>();
+ for (String child : getChildFieldNames()) {
+ ValueVector v = getChild(child);
+ if (v != null && index < v.getValueCount()) {
+ Object value = v.getObject(index);
+ if (value != null) {
+ vv.put(child, value);
+ }
+ }
+ }
+ return vv;
+ }
+
+ @Override
+ public int hashCode(int index) {
+ return hashCode(index, null);
+ }
+
+ @Override
+ public int hashCode(int index, ArrowBufHasher hasher) {
+ int hash = 0;
+ for (FieldVector v : getChildren()) {
+ if (index < v.getValueCount()) {
+ hash = ByteFunctionHelpers.combineHash(hash, v.hashCode(index, hasher));
+ }
+ }
+ return hash;
+ }
+
+ @Override
+ public <OUT, IN> OUT accept(VectorVisitor<OUT, IN> visitor, IN value) {
+ return visitor.visit(this, value);
+ }
+
+ @Override
+ public boolean isNull(int index) {
+ return false;
+ }
+
+ @Override
+ public int getNullCount() {
+ return 0;
+ }
+
+ public void get(int index, ComplexHolder holder) {
+ reader.setPosition(index);
+ holder.reader = reader;
+ }
+
+ @Override
+ public int getValueCount() {
+ return valueCount;
+ }
+
+ public ValueVector getVectorById(int id) {
+ return getChildByOrdinal(id);
+ }
+
+ @Override
+ public void setValueCount(int valueCount) {
+ for (final ValueVector v : getChildren()) {
+ v.setValueCount(valueCount);
+ }
+ NonNullableStructVector.this.valueCount = valueCount;
+ }
+
+ @Override
+ public void clear() {
+ for (final ValueVector v : getChildren()) {
+ v.clear();
+ }
+ valueCount = 0;
+ }
+
+ @Override
+ public void reset() {
+ for (final ValueVector v : getChildren()) {
+ v.reset();
+ }
+ valueCount = 0;
+ }
+
+ @Override
+ public Field getField() {
+ List<Field> children = new ArrayList<>();
+ for (ValueVector child : getChildren()) {
+ children.add(child.getField());
+ }
+ return new Field(name, fieldType, children);
+ }
+
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.STRUCT;
+ }
+
+ @Override
+ public void close() {
+ final Collection<FieldVector> vectors = getChildren();
+ for (final FieldVector v : vectors) {
+ v.close();
+ }
+ vectors.clear();
+
+ valueCount = 0;
+
+ super.close();
+ }
+
+ /** Initializes the struct's members from the given Fields. */
+ public void initializeChildrenFromFields(List<Field> children) {
+ for (Field field : children) {
+ FieldVector vector = (FieldVector) this.add(field.getName(), field.getFieldType());
+ vector.initializeChildrenFromFields(field.getChildren());
+ }
+ }
+
+ public List<FieldVector> getChildrenFromFields() {
+ return getChildren();
+ }
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/Positionable.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/Positionable.java
new file mode 100644
index 000000000..dda495408
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/Positionable.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex;
+
+/**
+ * Get and set position in a particular data structure.
+ *
+ */
+@SuppressWarnings("unused") // Used in when instantiating freemarker templates.
+public interface Positionable {
+ int getPosition();
+
+ void setPosition(int index);
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/PromotableVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/PromotableVector.java
new file mode 100644
index 000000000..d4dd94acb
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/PromotableVector.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex;
+
+import org.apache.arrow.vector.AddOrGetResult;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.types.pojo.FieldType;
+
+/**
+ * Vector that can store multiple {@linkplain FieldType} vectors as children.
+ */
+public interface PromotableVector {
+
+ <T extends ValueVector> AddOrGetResult<T> addOrGetVector(FieldType type);
+
+ UnionVector promoteToUnion();
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedFixedWidthVectorLike.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedFixedWidthVectorLike.java
new file mode 100644
index 000000000..e754f6913
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedFixedWidthVectorLike.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex;
+
+/**
+ * A {@link org.apache.arrow.vector.ValueVector} mix-in that can be used in conjunction with
+ * {@link RepeatedValueVector} subtypes.
+ */
+public interface RepeatedFixedWidthVectorLike {
+ /**
+ * Allocate a new memory space for this vector. Must be called prior to using the ValueVector.
+ *
+ * @param valueCount Number of separate repeating groupings.
+ * @param innerValueCount Number of supported values in the vector.
+ */
+ void allocateNew(int valueCount, int innerValueCount);
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedValueVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedValueVector.java
new file mode 100644
index 000000000..1cae881dd
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedValueVector.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex;
+
+import org.apache.arrow.vector.DensityAwareVector;
+import org.apache.arrow.vector.UInt4Vector;
+import org.apache.arrow.vector.ValueVector;
+
+/**
+ * An abstraction representing repeated value vectors.
+ *
+ * <p>A repeated vector contains values that may either be flat or nested. A value consists of zero or more
+ * cells(inner values). Current design maintains data and offsets vectors. Each cell is stored in the data vector.
+ * Repeated vector uses the offset vector to determine the sequence of cells pertaining to an individual value.
+ */
+public interface RepeatedValueVector extends ValueVector, DensityAwareVector {
+
+ int DEFAULT_REPEAT_PER_RECORD = 5;
+
+ /**
+ * Get the offset vector.
+ * @deprecated This API will be removed, as the current implementations no longer hold inner offset vectors.
+ *
+ * @return the underlying offset vector or null if none exists.
+ */
+ @Deprecated
+ UInt4Vector getOffsetVector();
+
+ /**
+ * Get the data vector.
+ * @return the underlying data vector or null if none exists.
+ */
+ ValueVector getDataVector();
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedVariableWidthVectorLike.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedVariableWidthVectorLike.java
new file mode 100644
index 000000000..5f5324138
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedVariableWidthVectorLike.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex;
+
+/**
+ * A {@link org.apache.arrow.vector.ValueVector} mix-in that can be used in conjunction with
+ * variable {@link RepeatedValueVector} subtypes (e.g. Strings, Lists, etc).
+ */
+public interface RepeatedVariableWidthVectorLike {
+ /**
+ * Allocate a new memory space for this vector. Must be called prior to using the ValueVector.
+ *
+ * @param totalBytes Desired size of the underlying data buffer.
+ * @param parentValueCount Number of separate repeating groupings.
+ * @param childValueCount Number of supported values in the vector.
+ */
+ void allocateNew(int totalBytes, int parentValueCount, int childValueCount);
+
+ /**
+ * Provide the maximum amount of variable width bytes that can be stored int his vector.
+ *
+ * @return the byte capacity
+ */
+ int getByteCapacity();
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/StateTool.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/StateTool.java
new file mode 100644
index 000000000..0098f6836
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/StateTool.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex;
+
+import java.util.Arrays;
+
+/**
+ * Utility methods for state machines based on enums.
+ */
+public class StateTool {
+ private StateTool() {}
+
+ static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(StateTool.class);
+
+ /**
+ * Verifies <code>currentState</code> is in one of <code>expectedStates</code>,
+ * throws an IllegalArgumentException if it isn't.
+ */
+ public static <T extends Enum<?>> void check(T currentState, T... expectedStates) {
+ for (T s : expectedStates) {
+ if (s == currentState) {
+ return;
+ }
+ }
+ throw new IllegalArgumentException(String.format("Expected to be in one of these states %s but was actually in " +
+ "state %s", Arrays.toString(expectedStates), currentState));
+ }
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java
new file mode 100644
index 000000000..2dabc6e01
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java
@@ -0,0 +1,608 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex;
+
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+import static org.apache.arrow.util.Preconditions.checkNotNull;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.util.ArrowBufPointer;
+import org.apache.arrow.memory.util.CommonUtil;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.BaseValueVector;
+import org.apache.arrow.vector.BitVectorHelper;
+import org.apache.arrow.vector.BufferBacked;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.complex.impl.NullableStructReaderImpl;
+import org.apache.arrow.vector.complex.impl.NullableStructWriter;
+import org.apache.arrow.vector.holders.ComplexHolder;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.ArrowType.Struct;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.OversizedAllocationException;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * A Struct vector consists of nullability/validity buffer and children vectors
+ * that make up the struct's fields. The children vectors are handled by the
+ * parent class.
+ */
+public class StructVector extends NonNullableStructVector implements FieldVector {
+
+ public static StructVector empty(String name, BufferAllocator allocator) {
+ FieldType fieldType = FieldType.nullable(Struct.INSTANCE);
+ return new StructVector(name, allocator, fieldType, null, ConflictPolicy.CONFLICT_REPLACE, false);
+ }
+
+ public static StructVector emptyWithDuplicates(String name, BufferAllocator allocator) {
+ FieldType fieldType = new FieldType(false, ArrowType.Struct.INSTANCE, null, null);
+ return new StructVector(name, allocator, fieldType, null, ConflictPolicy.CONFLICT_APPEND, true);
+ }
+
+ private final NullableStructReaderImpl reader = new NullableStructReaderImpl(this);
+ private final NullableStructWriter writer = new NullableStructWriter(this);
+
+ protected ArrowBuf validityBuffer;
+ private int validityAllocationSizeInBytes;
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param name The name of the instance.
+ * @param allocator The allocator to use to allocating/reallocating buffers.
+ * @param fieldType The type of this list.
+ * @param callBack A schema change callback.
+ */
+ public StructVector(String name,
+ BufferAllocator allocator,
+ FieldType fieldType,
+ CallBack callBack) {
+ super(name,
+ checkNotNull(allocator),
+ fieldType,
+ callBack);
+ this.validityBuffer = allocator.getEmpty();
+ this.validityAllocationSizeInBytes =
+ BitVectorHelper.getValidityBufferSize(BaseValueVector.INITIAL_VALUE_ALLOCATION);
+ }
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param name The name of the instance.
+ * @param allocator The allocator to use to allocating/reallocating buffers.
+ * @param fieldType The type of this list.
+ * @param callBack A schema change callback.
+ * @param conflictPolicy policy to determine how duplicate names are handled.
+ * @param allowConflictPolicyChanges wether duplicate names are allowed at all.
+ */
+ public StructVector(String name,
+ BufferAllocator allocator,
+ FieldType fieldType,
+ CallBack callBack,
+ ConflictPolicy conflictPolicy,
+ boolean allowConflictPolicyChanges) {
+ super(name, checkNotNull(allocator), fieldType, callBack, conflictPolicy, allowConflictPolicyChanges);
+ this.validityBuffer = allocator.getEmpty();
+ this.validityAllocationSizeInBytes =
+ BitVectorHelper.getValidityBufferSize(BaseValueVector.INITIAL_VALUE_ALLOCATION);
+ }
+
+ @Override
+ public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers) {
+ if (ownBuffers.size() != 1) {
+ throw new IllegalArgumentException("Illegal buffer count, expected " + 1 + ", got: " + ownBuffers.size());
+ }
+
+ ArrowBuf bitBuffer = ownBuffers.get(0);
+
+ validityBuffer.getReferenceManager().release();
+ validityBuffer = BitVectorHelper.loadValidityBuffer(fieldNode, bitBuffer, allocator);
+ valueCount = fieldNode.getLength();
+ validityAllocationSizeInBytes = checkedCastToInt(validityBuffer.capacity());
+ }
+
+ @Override
+ public List<ArrowBuf> getFieldBuffers() {
+ List<ArrowBuf> result = new ArrayList<>(1);
+ setReaderAndWriterIndex();
+ result.add(validityBuffer);
+
+ return result;
+ }
+
+ private void setReaderAndWriterIndex() {
+ validityBuffer.readerIndex(0);
+ validityBuffer.writerIndex(BitVectorHelper.getValidityBufferSize(valueCount));
+ }
+
+ /**
+ * Get the inner vectors.
+ *
+ * @deprecated This API will be removed as the current implementations no longer support inner vectors.
+ *
+ * @return the inner vectors for this field as defined by the TypeLayout
+ */
+ @Deprecated
+ @Override
+ public List<BufferBacked> getFieldInnerVectors() {
+ throw new UnsupportedOperationException("There are no inner vectors. Use getFieldBuffers");
+ }
+
+ @Override
+ public NullableStructReaderImpl getReader() {
+ return reader;
+ }
+
+ public NullableStructWriter getWriter() {
+ return writer;
+ }
+
+ @Override
+ public TransferPair getTransferPair(BufferAllocator allocator) {
+ return new NullableStructTransferPair(this, new StructVector(name,
+ allocator,
+ fieldType,
+ null,
+ getConflictPolicy(),
+ allowConflictPolicyChanges), false);
+ }
+
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new NullableStructTransferPair(this, (StructVector) to, false);
+ }
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new NullableStructTransferPair(this, new StructVector(ref,
+ allocator,
+ fieldType,
+ null,
+ getConflictPolicy(),
+ allowConflictPolicyChanges), false);
+ }
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) {
+ return new NullableStructTransferPair(this, new StructVector(ref,
+ allocator,
+ fieldType,
+ callBack,
+ getConflictPolicy(),
+ allowConflictPolicyChanges), false);
+ }
+
+ /**
+ * {@link TransferPair} for this (nullable) {@link StructVector}.
+ */
+ protected class NullableStructTransferPair extends StructTransferPair {
+
+ private StructVector target;
+
+ protected NullableStructTransferPair(StructVector from, StructVector to, boolean allocate) {
+ super(from, to, allocate);
+ this.target = to;
+ }
+
+ @Override
+ public void transfer() {
+ target.clear();
+ target.validityBuffer = BaseValueVector.transferBuffer(validityBuffer, target.allocator);
+ super.transfer();
+ clear();
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ while (toIndex >= target.getValidityBufferValueCapacity()) {
+ target.reallocValidityBuffer();
+ }
+ BitVectorHelper.setValidityBit(target.validityBuffer, toIndex, isSet(fromIndex));
+ super.copyValueSafe(fromIndex, toIndex);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ Preconditions.checkArgument(startIndex >= 0 && length >= 0 && startIndex + length <= valueCount,
+ "Invalid parameters startIndex: %s, length: %s for valueCount: %s", startIndex, length, valueCount);
+ target.clear();
+ splitAndTransferValidityBuffer(startIndex, length, target);
+ super.splitAndTransfer(startIndex, length);
+ }
+ }
+
+ /*
+ * transfer the validity.
+ */
+ private void splitAndTransferValidityBuffer(int startIndex, int length, StructVector target) {
+ int firstByteSource = BitVectorHelper.byteIndex(startIndex);
+ int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1);
+ int byteSizeTarget = BitVectorHelper.getValidityBufferSize(length);
+ int offset = startIndex % 8;
+
+ if (length > 0) {
+ if (offset == 0) {
+ // slice
+ if (target.validityBuffer != null) {
+ target.validityBuffer.getReferenceManager().release();
+ }
+ target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget);
+ target.validityBuffer.getReferenceManager().retain(1);
+ } else {
+ /* Copy data
+ * When the first bit starts from the middle of a byte (offset != 0),
+ * copy data from src BitVector.
+ * Each byte in the target is composed by a part in i-th byte,
+ * another part in (i+1)-th byte.
+ */
+ target.allocateValidityBuffer(byteSizeTarget);
+
+ for (int i = 0; i < byteSizeTarget - 1; i++) {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(validityBuffer, firstByteSource + i, offset);
+ byte b2 = BitVectorHelper.getBitsFromNextByte(validityBuffer, firstByteSource + i + 1, offset);
+
+ target.validityBuffer.setByte(i, (b1 + b2));
+ }
+
+ /* Copying the last piece is done in the following manner:
+ * if the source vector has 1 or more bytes remaining, we copy
+ * the last piece as a byte formed by shifting data
+ * from the current byte and the next byte.
+ *
+ * if the source vector has no more bytes remaining
+ * (we are at the last byte), we copy the last piece as a byte
+ * by shifting data from the current byte.
+ */
+ if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(validityBuffer,
+ firstByteSource + byteSizeTarget - 1, offset);
+ byte b2 = BitVectorHelper.getBitsFromNextByte(validityBuffer,
+ firstByteSource + byteSizeTarget, offset);
+
+ target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2);
+ } else {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(validityBuffer,
+ firstByteSource + byteSizeTarget - 1, offset);
+ target.validityBuffer.setByte(byteSizeTarget - 1, b1);
+ }
+ }
+ }
+ }
+
+ /**
+ * Get the value capacity of the internal validity buffer.
+ * @return number of elements that validity buffer can hold
+ */
+ private int getValidityBufferValueCapacity() {
+ return checkedCastToInt(validityBuffer.capacity() * 8);
+ }
+
+ /**
+ * Get the current value capacity for the vector.
+ * @return number of elements that vector can hold.
+ */
+ @Override
+ public int getValueCapacity() {
+ return Math.min(getValidityBufferValueCapacity(),
+ super.getValueCapacity());
+ }
+
+ /**
+ * Return the underlying buffers associated with this vector. Note that this doesn't
+ * impact the reference counts for this buffer so it only should be used for in-context
+ * access. Also note that this buffer changes regularly thus
+ * external classes shouldn't hold a reference to it (unless they change it).
+ *
+ * @param clear Whether to clear vector before returning; the buffers will still be refcounted
+ * but the returned array will be the only reference to them
+ * @return The underlying {@link ArrowBuf buffers} that is used by this
+ * vector instance.
+ */
+ @Override
+ public ArrowBuf[] getBuffers(boolean clear) {
+ setReaderAndWriterIndex();
+ final ArrowBuf[] buffers;
+ if (getBufferSize() == 0) {
+ buffers = new ArrowBuf[0];
+ } else {
+ List<ArrowBuf> list = new ArrayList<>();
+ list.add(validityBuffer);
+ list.addAll(Arrays.asList(super.getBuffers(false)));
+ buffers = list.toArray(new ArrowBuf[list.size()]);
+ }
+ if (clear) {
+ for (ArrowBuf buffer : buffers) {
+ buffer.getReferenceManager().retain();
+ }
+ clear();
+ }
+
+ return buffers;
+ }
+
+ /**
+ * Close the vector and release the associated buffers.
+ */
+ @Override
+ public void close() {
+ clearValidityBuffer();
+ super.close();
+ }
+
+ /**
+ * Same as {@link #close()}.
+ */
+ @Override
+ public void clear() {
+ clearValidityBuffer();
+ super.clear();
+ }
+
+ /**
+ * Reset this vector to empty, does not release buffers.
+ */
+ @Override
+ public void reset() {
+ super.reset();
+ validityBuffer.setZero(0, validityBuffer.capacity());
+ }
+
+ /**
+ * Release the validity buffer.
+ */
+ private void clearValidityBuffer() {
+ validityBuffer.getReferenceManager().release();
+ validityBuffer = allocator.getEmpty();
+ }
+
+ /**
+ * Get the size (number of bytes) of underlying buffers used by this vector.
+ *
+ * @return size of underlying buffers.
+ */
+ @Override
+ public int getBufferSize() {
+ if (valueCount == 0) {
+ return 0;
+ }
+ return super.getBufferSize() +
+ BitVectorHelper.getValidityBufferSize(valueCount);
+ }
+
+ /**
+ * Get the potential buffer size for a particular number of records.
+ *
+ * @param valueCount desired number of elements in the vector
+ * @return estimated size of underlying buffers if the vector holds
+ * a given number of elements
+ */
+ @Override
+ public int getBufferSizeFor(final int valueCount) {
+ if (valueCount == 0) {
+ return 0;
+ }
+ return super.getBufferSizeFor(valueCount) +
+ BitVectorHelper.getValidityBufferSize(valueCount);
+ }
+
+ @Override
+ public void setInitialCapacity(int numRecords) {
+ validityAllocationSizeInBytes = BitVectorHelper.getValidityBufferSize(numRecords);
+ super.setInitialCapacity(numRecords);
+ }
+
+ @Override
+ public void setInitialCapacity(int numRecords, double density) {
+ validityAllocationSizeInBytes = BitVectorHelper.getValidityBufferSize(numRecords);
+ super.setInitialCapacity(numRecords, density);
+ }
+
+ @Override
+ public boolean allocateNewSafe() {
+ /* Boolean to keep track if all the memory allocations were successful
+ * Used in the case of composite vectors when we need to allocate multiple
+ * buffers for multiple vectors. If one of the allocations failed we need to
+ * clear all the memory that we allocated
+ */
+ boolean success = false;
+ try {
+ clear();
+ allocateValidityBuffer(validityAllocationSizeInBytes);
+ success = super.allocateNewSafe();
+ } finally {
+ if (!success) {
+ clear();
+ return false;
+ }
+ }
+ return true;
+ }
+
+ private void allocateValidityBuffer(final long size) {
+ final int curSize = (int) size;
+ validityBuffer = allocator.buffer(curSize);
+ validityBuffer.readerIndex(0);
+ validityAllocationSizeInBytes = curSize;
+ validityBuffer.setZero(0, validityBuffer.capacity());
+ }
+
+ @Override
+ public void reAlloc() {
+ /* reallocate the validity buffer */
+ reallocValidityBuffer();
+ super.reAlloc();
+ }
+
+ private void reallocValidityBuffer() {
+ final int currentBufferCapacity = checkedCastToInt(validityBuffer.capacity());
+ long newAllocationSize = currentBufferCapacity * 2;
+ if (newAllocationSize == 0) {
+ if (validityAllocationSizeInBytes > 0) {
+ newAllocationSize = validityAllocationSizeInBytes;
+ } else {
+ newAllocationSize = BitVectorHelper.getValidityBufferSize(BaseValueVector.INITIAL_VALUE_ALLOCATION) * 2;
+ }
+ }
+ newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize);
+ assert newAllocationSize >= 1;
+
+ if (newAllocationSize > BaseValueVector.MAX_ALLOCATION_SIZE) {
+ throw new OversizedAllocationException("Unable to expand the buffer");
+ }
+
+ final ArrowBuf newBuf = allocator.buffer((int) newAllocationSize);
+ newBuf.setBytes(0, validityBuffer, 0, currentBufferCapacity);
+ newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity);
+ validityBuffer.getReferenceManager().release(1);
+ validityBuffer = newBuf;
+ validityAllocationSizeInBytes = (int) newAllocationSize;
+ }
+
+ @Override
+ public long getValidityBufferAddress() {
+ return validityBuffer.memoryAddress();
+ }
+
+ @Override
+ public long getDataBufferAddress() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long getOffsetBufferAddress() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public ArrowBuf getValidityBuffer() {
+ return validityBuffer;
+ }
+
+ @Override
+ public ArrowBuf getDataBuffer() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public ArrowBuf getOffsetBuffer() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public Map<String, ?> getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return super.getObject(index);
+ }
+ }
+
+ @Override
+ public int hashCode(int index) {
+ return hashCode(index, null);
+ }
+
+ @Override
+ public int hashCode(int index, ArrowBufHasher hasher) {
+ if (isSet(index) == 0) {
+ return ArrowBufPointer.NULL_HASH_CODE;
+ } else {
+ return super.hashCode(index, hasher);
+ }
+ }
+
+ @Override
+ public void get(int index, ComplexHolder holder) {
+ holder.isSet = isSet(index);
+ if (holder.isSet == 0) {
+ holder.reader = null;
+ return;
+ }
+ super.get(index, holder);
+ }
+
+ /**
+ * Return the number of null values in the vector.
+ */
+ public int getNullCount() {
+ return BitVectorHelper.getNullCount(validityBuffer, valueCount);
+ }
+
+ /**
+ * Returns true if the value at the provided index is null.
+ */
+ public boolean isNull(int index) {
+ return isSet(index) == 0;
+ }
+
+ /**
+ * Returns true the value at the given index is set (i.e. not null).
+ */
+ public int isSet(int index) {
+ final int byteIndex = index >> 3;
+ final byte b = validityBuffer.getByte(byteIndex);
+ final int bitIndex = index & 7;
+ return (b >> bitIndex) & 0x01;
+ }
+
+ /**
+ * Marks the value at index as being set. Reallocates the validity buffer
+ * if index is larger than current capacity.
+ */
+ public void setIndexDefined(int index) {
+ while (index >= getValidityBufferValueCapacity()) {
+ /* realloc the inner buffers if needed */
+ reallocValidityBuffer();
+ }
+ BitVectorHelper.setBit(validityBuffer, index);
+ }
+
+ /**
+ * Marks the value at index as null/not set.
+ */
+ public void setNull(int index) {
+ while (index >= getValidityBufferValueCapacity()) {
+ /* realloc the inner buffers if needed */
+ reallocValidityBuffer();
+ }
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+
+ @Override
+ public void setValueCount(int valueCount) {
+ Preconditions.checkArgument(valueCount >= 0);
+ while (valueCount > getValidityBufferValueCapacity()) {
+ /* realloc the inner buffers if needed */
+ reallocValidityBuffer();
+ }
+ super.setValueCount(valueCount);
+ this.valueCount = valueCount;
+ }
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/VectorWithOrdinal.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/VectorWithOrdinal.java
new file mode 100644
index 000000000..fa00f4b63
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/VectorWithOrdinal.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex;
+
+import org.apache.arrow.vector.ValueVector;
+
+/**
+ * Tuple of a {@link ValueVector} and an index into a data structure containing the {@link ValueVector}.
+ * Useful for composite types to determine the index of a child.
+ */
+public class VectorWithOrdinal {
+ public final ValueVector vector;
+ public final int ordinal;
+
+ public VectorWithOrdinal(ValueVector v, int ordinal) {
+ this.vector = v;
+ this.ordinal = ordinal;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseReader.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseReader.java
new file mode 100644
index 000000000..c80fcb89d
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseReader.java
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex.impl;
+
+import java.util.Iterator;
+
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter;
+import org.apache.arrow.vector.complex.writer.FieldWriter;
+import org.apache.arrow.vector.holders.DenseUnionHolder;
+import org.apache.arrow.vector.holders.UnionHolder;
+
+/**
+ * Base class providing common functionality for {@link FieldReader} implementations.
+ *
+ * <p>This includes tracking the current index and throwing implementations of optional methods.
+ */
+abstract class AbstractBaseReader implements FieldReader {
+
+ static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(AbstractBaseReader.class);
+
+ private int index;
+
+ public AbstractBaseReader() {
+ super();
+ }
+
+ @Override
+ public int getPosition() {
+ return index;
+ }
+
+ public void setPosition(int index) {
+ this.index = index;
+ }
+
+ protected int idx() {
+ return index;
+ }
+
+ @Override
+ public void reset() {
+ index = 0;
+ }
+
+ @Override
+ public Iterator<String> iterator() {
+ throw new IllegalStateException("The current reader doesn't support reading as a map.");
+ }
+
+ @Override
+ public boolean next() {
+ throw new IllegalStateException("The current reader doesn't support getting next information.");
+ }
+
+ @Override
+ public int size() {
+ throw new IllegalStateException("The current reader doesn't support getting size information.");
+ }
+
+ @Override
+ public void read(UnionHolder holder) {
+ holder.reader = this;
+ holder.isSet = this.isSet() ? 1 : 0;
+ }
+
+ @Override
+ public void read(int index, UnionHolder holder) {
+ throw new IllegalStateException("The current reader doesn't support reading union type");
+ }
+
+ @Override
+ public void copyAsValue(UnionWriter writer) {
+ throw new IllegalStateException("The current reader doesn't support reading union type");
+ }
+
+ @Override
+ public void read(DenseUnionHolder holder) {
+ holder.reader = this;
+ holder.isSet = this.isSet() ? 1 : 0;
+ }
+
+ @Override
+ public void read(int index, DenseUnionHolder holder) {
+ throw new IllegalStateException("The current reader doesn't support reading dense union type");
+ }
+
+ @Override
+ public void copyAsValue(DenseUnionWriter writer) {
+ throw new IllegalStateException("The current reader doesn't support reading dense union type");
+ }
+
+ @Override
+ public void copyAsValue(ListWriter writer) {
+ ComplexCopier.copy(this, (FieldWriter) writer);
+ }
+
+ @Override
+ public void copyAsValue(MapWriter writer) {
+ ComplexCopier.copy(this, (FieldWriter) writer);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseWriter.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseWriter.java
new file mode 100644
index 000000000..cc3c5deed
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseWriter.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex.impl;
+
+import org.apache.arrow.vector.complex.writer.FieldWriter;
+
+
+/**
+ * Base class providing common functionality for {@link FieldWriter} implementations.
+ *
+ * <p>Currently this only includes index tracking.
+ */
+abstract class AbstractBaseWriter implements FieldWriter {
+ //private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(AbstractBaseWriter.class);
+
+ private int index;
+
+ @Override
+ public String toString() {
+ return super.toString() + "[index = " + index + "]";
+ }
+
+ int idx() {
+ return index;
+ }
+
+ @Override
+ public int getPosition() {
+ return index;
+ }
+
+ @Override
+ public void setPosition(int index) {
+ this.index = index;
+ }
+
+ @Override
+ public void end() {
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/ComplexWriterImpl.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/ComplexWriterImpl.java
new file mode 100644
index 000000000..13b26bb67
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/ComplexWriterImpl.java
@@ -0,0 +1,227 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex.impl;
+
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.NonNullableStructVector;
+import org.apache.arrow.vector.complex.StateTool;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.writer.BaseWriter.ComplexWriter;
+import org.apache.arrow.vector.types.pojo.Field;
+
+/**
+ * Concrete implementation of {@link ComplexWriter}.
+ */
+public class ComplexWriterImpl extends AbstractFieldWriter implements ComplexWriter {
+
+ private NullableStructWriter structRoot;
+ private UnionListWriter listRoot;
+ private final NonNullableStructVector container;
+
+ Mode mode = Mode.INIT;
+ private final String name;
+ private final boolean unionEnabled;
+ private final NullableStructWriterFactory nullableStructWriterFactory;
+
+ private enum Mode { INIT, STRUCT, LIST }
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param name The name of the writer (for tracking).
+ * @param container A container for the data field to be written.
+ * @param unionEnabled Unused.
+ * @param caseSensitive Whether field names are case sensitive (if false field names will be lowercase.
+ */
+ public ComplexWriterImpl(
+ String name,
+ NonNullableStructVector container,
+ boolean unionEnabled,
+ boolean caseSensitive) {
+ this.name = name;
+ this.container = container;
+ this.unionEnabled = unionEnabled;
+ nullableStructWriterFactory = caseSensitive ?
+ NullableStructWriterFactory.getNullableCaseSensitiveStructWriterFactoryInstance() :
+ NullableStructWriterFactory.getNullableStructWriterFactoryInstance();
+ }
+
+ public ComplexWriterImpl(String name, NonNullableStructVector container, boolean unionEnabled) {
+ this(name, container, unionEnabled, false);
+ }
+
+ public ComplexWriterImpl(String name, NonNullableStructVector container) {
+ this(name, container, false);
+ }
+
+ @Override
+ public Field getField() {
+ return container.getField();
+ }
+
+ @Override
+ public int getValueCapacity() {
+ return container.getValueCapacity();
+ }
+
+ private void check(Mode... modes) {
+ StateTool.check(mode, modes);
+ }
+
+ @Override
+ public void reset() {
+ setPosition(0);
+ }
+
+ @Override
+ public void close() throws Exception {
+ clear();
+ structRoot.close();
+ if (listRoot != null) {
+ listRoot.close();
+ }
+ }
+
+ @Override
+ public void clear() {
+ switch (mode) {
+ case STRUCT:
+ structRoot.clear();
+ break;
+ case LIST:
+ listRoot.clear();
+ break;
+ default:
+ break;
+ }
+ }
+
+ @Override
+ public void setValueCount(int count) {
+ switch (mode) {
+ case STRUCT:
+ structRoot.setValueCount(count);
+ break;
+ case LIST:
+ listRoot.setValueCount(count);
+ break;
+ default:
+ break;
+ }
+ }
+
+ @Override
+ public void setPosition(int index) {
+ super.setPosition(index);
+ switch (mode) {
+ case STRUCT:
+ structRoot.setPosition(index);
+ break;
+ case LIST:
+ listRoot.setPosition(index);
+ break;
+ default:
+ break;
+ }
+ }
+
+ /**
+ * Returns a StructWriter, initializing it necessary from the constructor this instance
+ * was constructed with.
+ */
+ public StructWriter directStruct() {
+ Preconditions.checkArgument(name == null);
+
+ switch (mode) {
+
+ case INIT:
+ structRoot = nullableStructWriterFactory.build((StructVector) container);
+ structRoot.setPosition(idx());
+ mode = Mode.STRUCT;
+ break;
+
+ case STRUCT:
+ break;
+
+ default:
+ check(Mode.INIT, Mode.STRUCT);
+ }
+
+ return structRoot;
+ }
+
+ @Override
+ public StructWriter rootAsStruct() {
+ switch (mode) {
+
+ case INIT:
+ // TODO allow dictionaries in complex types
+ StructVector struct = container.addOrGetStruct(name);
+ structRoot = nullableStructWriterFactory.build(struct);
+ structRoot.setPosition(idx());
+ mode = Mode.STRUCT;
+ break;
+
+ case STRUCT:
+ break;
+
+ default:
+ check(Mode.INIT, Mode.STRUCT);
+ }
+
+ return structRoot;
+ }
+
+ @Override
+ public void allocate() {
+ if (structRoot != null) {
+ structRoot.allocate();
+ } else if (listRoot != null) {
+ listRoot.allocate();
+ }
+ }
+
+ @Override
+ public ListWriter rootAsList() {
+ switch (mode) {
+
+ case INIT:
+ int vectorCount = container.size();
+ // TODO allow dictionaries in complex types
+ ListVector listVector = container.addOrGetList(name);
+ if (container.size() > vectorCount) {
+ listVector.allocateNew();
+ }
+ listRoot = new UnionListWriter(listVector, nullableStructWriterFactory);
+ listRoot.setPosition(idx());
+ mode = Mode.LIST;
+ break;
+
+ case LIST:
+ break;
+
+ default:
+ check(Mode.INIT, Mode.STRUCT);
+ }
+
+ return listRoot;
+ }
+
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableStructReaderImpl.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableStructReaderImpl.java
new file mode 100644
index 000000000..5c098f627
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableStructReaderImpl.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex.impl;
+
+import org.apache.arrow.vector.complex.NonNullableStructVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter;
+import org.apache.arrow.vector.types.pojo.Field;
+
+/**
+ * An {@link org.apache.arrow.vector.complex.reader.FieldReader} for
+ * reading nullable struct vectors.
+ */
+public class NullableStructReaderImpl extends SingleStructReaderImpl {
+
+ private StructVector nullableStructVector;
+
+ public NullableStructReaderImpl(NonNullableStructVector vector) {
+ super(vector);
+ this.nullableStructVector = (StructVector) vector;
+ }
+
+ @Override
+ public Field getField() {
+ return nullableStructVector.getField();
+ }
+
+ @Override
+ public void copyAsValue(StructWriter writer) {
+ NullableStructWriter impl = (NullableStructWriter) writer;
+ impl.container.copyFromSafe(idx(), impl.idx(), nullableStructVector);
+ }
+
+ @Override
+ public void copyAsField(String name, StructWriter writer) {
+ NullableStructWriter impl = (NullableStructWriter) writer.struct(name);
+ impl.container.copyFromSafe(idx(), impl.idx(), nullableStructVector);
+ }
+
+ @Override
+ public boolean isSet() {
+ return !nullableStructVector.isNull(idx());
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableStructWriterFactory.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableStructWriterFactory.java
new file mode 100644
index 000000000..458aa7b61
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableStructWriterFactory.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex.impl;
+
+import org.apache.arrow.vector.complex.StructVector;
+
+/**
+ * A factory for {@link NullableStructWriter} instances. The factory allows for configuring if field
+ * names should be considered case sensitive.
+ */
+public class NullableStructWriterFactory {
+ private final boolean caseSensitive;
+ private static final NullableStructWriterFactory nullableStructWriterFactory =
+ new NullableStructWriterFactory(false);
+ private static final NullableStructWriterFactory nullableCaseSensitiveWriterFactory =
+ new NullableStructWriterFactory(true);
+
+ public NullableStructWriterFactory(boolean caseSensitive) {
+ this.caseSensitive = caseSensitive;
+ }
+
+ public NullableStructWriter build(StructVector container) {
+ return this.caseSensitive ? new NullableCaseSensitiveStructWriter(container) : new NullableStructWriter(container);
+ }
+
+ public static NullableStructWriterFactory getNullableStructWriterFactoryInstance() {
+ return nullableStructWriterFactory;
+ }
+
+ public static NullableStructWriterFactory getNullableCaseSensitiveStructWriterFactoryInstance() {
+ return nullableCaseSensitiveWriterFactory;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java
new file mode 100644
index 000000000..06b064fda
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java
@@ -0,0 +1,398 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex.impl;
+
+import java.math.BigDecimal;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.complex.AbstractStructVector;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.MapVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.complex.writer.FieldWriter;
+import org.apache.arrow.vector.holders.Decimal256Holder;
+import org.apache.arrow.vector.holders.DecimalHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * This FieldWriter implementation delegates all FieldWriter API calls to an inner FieldWriter. This inner field writer
+ * can start as a specific type, and this class will promote the writer to a UnionWriter if a call is made that the
+ * specifically typed writer cannot handle. A new UnionVector is created, wrapping the original vector, and replaces the
+ * original vector in the parent vector, which can be either an AbstractStructVector or a ListVector.
+ *
+ * <p>The writer used can either be for single elements (struct) or lists.</p>
+ */
+public class PromotableWriter extends AbstractPromotableFieldWriter {
+
+ private final AbstractStructVector parentContainer;
+ private final ListVector listVector;
+ private final FixedSizeListVector fixedListVector;
+ private final LargeListVector largeListVector;
+ private final NullableStructWriterFactory nullableStructWriterFactory;
+ private int position;
+ private static final int MAX_DECIMAL_PRECISION = 38;
+ private static final int MAX_DECIMAL256_PRECISION = 76;
+
+ private enum State {
+ UNTYPED, SINGLE, UNION
+ }
+
+ private MinorType type;
+ private ValueVector vector;
+ private UnionVector unionVector;
+ private State state;
+ private FieldWriter writer;
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param v The vector to write.
+ * @param parentContainer The parent container for the vector.
+ */
+ public PromotableWriter(ValueVector v, AbstractStructVector parentContainer) {
+ this(v, parentContainer, NullableStructWriterFactory.getNullableStructWriterFactoryInstance());
+ }
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param v The vector to initialize the writer with.
+ * @param parentContainer The parent container for the vector.
+ * @param nullableStructWriterFactory The factory to create the delegate writer.
+ */
+ public PromotableWriter(
+ ValueVector v,
+ AbstractStructVector parentContainer,
+ NullableStructWriterFactory nullableStructWriterFactory) {
+ this.parentContainer = parentContainer;
+ this.listVector = null;
+ this.fixedListVector = null;
+ this.largeListVector = null;
+ this.nullableStructWriterFactory = nullableStructWriterFactory;
+ init(v);
+ }
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param v The vector to initialize the writer with.
+ * @param listVector The vector that serves as a parent of v.
+ */
+ public PromotableWriter(ValueVector v, ListVector listVector) {
+ this(v, listVector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance());
+ }
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param v The vector to initialize the writer with.
+ * @param fixedListVector The vector that serves as a parent of v.
+ */
+ public PromotableWriter(ValueVector v, FixedSizeListVector fixedListVector) {
+ this(v, fixedListVector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance());
+ }
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param v The vector to initialize the writer with.
+ * @param largeListVector The vector that serves as a parent of v.
+ */
+ public PromotableWriter(ValueVector v, LargeListVector largeListVector) {
+ this(v, largeListVector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance());
+ }
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param v The vector to initialize the writer with.
+ * @param listVector The vector that serves as a parent of v.
+ * @param nullableStructWriterFactory The factory to create the delegate writer.
+ */
+ public PromotableWriter(
+ ValueVector v,
+ ListVector listVector,
+ NullableStructWriterFactory nullableStructWriterFactory) {
+ this.listVector = listVector;
+ this.parentContainer = null;
+ this.fixedListVector = null;
+ this.largeListVector = null;
+ this.nullableStructWriterFactory = nullableStructWriterFactory;
+ init(v);
+ }
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param v The vector to initialize the writer with.
+ * @param fixedListVector The vector that serves as a parent of v.
+ * @param nullableStructWriterFactory The factory to create the delegate writer.
+ */
+ public PromotableWriter(
+ ValueVector v,
+ FixedSizeListVector fixedListVector,
+ NullableStructWriterFactory nullableStructWriterFactory) {
+ this.fixedListVector = fixedListVector;
+ this.parentContainer = null;
+ this.listVector = null;
+ this.largeListVector = null;
+ this.nullableStructWriterFactory = nullableStructWriterFactory;
+ init(v);
+ }
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param v The vector to initialize the writer with.
+ * @param largeListVector The vector that serves as a parent of v.
+ * @param nullableStructWriterFactory The factory to create the delegate writer.
+ */
+ public PromotableWriter(
+ ValueVector v,
+ LargeListVector largeListVector,
+ NullableStructWriterFactory nullableStructWriterFactory) {
+ this.largeListVector = largeListVector;
+ this.fixedListVector = null;
+ this.parentContainer = null;
+ this.listVector = null;
+ this.nullableStructWriterFactory = nullableStructWriterFactory;
+ init(v);
+ }
+
+ private void init(ValueVector v) {
+ if (v instanceof UnionVector) {
+ state = State.UNION;
+ unionVector = (UnionVector) v;
+ writer = new UnionWriter(unionVector, nullableStructWriterFactory);
+ } else if (v instanceof NullVector) {
+ state = State.UNTYPED;
+ } else {
+ setWriter(v);
+ }
+ }
+
+ @Override
+ public void setAddVectorAsNullable(boolean nullable) {
+ super.setAddVectorAsNullable(nullable);
+ if (writer instanceof AbstractFieldWriter) {
+ ((AbstractFieldWriter) writer).setAddVectorAsNullable(nullable);
+ }
+ }
+
+ private void setWriter(ValueVector v) {
+ state = State.SINGLE;
+ vector = v;
+ type = v.getMinorType();
+ switch (type) {
+ case STRUCT:
+ writer = nullableStructWriterFactory.build((StructVector) vector);
+ break;
+ case LIST:
+ writer = new UnionListWriter((ListVector) vector, nullableStructWriterFactory);
+ break;
+ case MAP:
+ writer = new UnionMapWriter((MapVector) vector);
+ break;
+ case UNION:
+ writer = new UnionWriter((UnionVector) vector, nullableStructWriterFactory);
+ break;
+ default:
+ writer = type.getNewFieldWriter(vector);
+ break;
+ }
+ }
+
+ @Override
+ public void writeNull() {
+ FieldWriter w = getWriter();
+ if (w != null) {
+ w.writeNull();
+ }
+ setPosition(idx() + 1);
+ }
+
+ @Override
+ public void setPosition(int index) {
+ super.setPosition(index);
+ FieldWriter w = getWriter();
+ if (w == null) {
+ position = index;
+ } else {
+ w.setPosition(index);
+ }
+ }
+
+ @Override
+ protected FieldWriter getWriter(MinorType type, ArrowType arrowType) {
+ if (state == State.UNION) {
+ if (type == MinorType.DECIMAL || type == MinorType.MAP) {
+ ((UnionWriter) writer).getWriter(type, arrowType);
+ } else {
+ ((UnionWriter) writer).getWriter(type);
+ }
+ } else if (state == State.UNTYPED) {
+ if (type == null) {
+ // ???
+ return null;
+ }
+ if (arrowType == null) {
+ arrowType = type.getType();
+ }
+ FieldType fieldType = new FieldType(addVectorAsNullable, arrowType, null, null);
+ ValueVector v;
+ if (listVector != null) {
+ v = listVector.addOrGetVector(fieldType).getVector();
+ } else if (fixedListVector != null) {
+ v = fixedListVector.addOrGetVector(fieldType).getVector();
+ } else {
+ v = largeListVector.addOrGetVector(fieldType).getVector();
+ }
+ v.allocateNew();
+ setWriter(v);
+ writer.setPosition(position);
+ } else if (type != this.type) {
+ promoteToUnion();
+ if (type == MinorType.DECIMAL || type == MinorType.MAP) {
+ ((UnionWriter) writer).getWriter(type, arrowType);
+ } else {
+ ((UnionWriter) writer).getWriter(type);
+ }
+ }
+ return writer;
+ }
+
+ @Override
+ public boolean isEmptyStruct() {
+ return writer.isEmptyStruct();
+ }
+
+ protected FieldWriter getWriter() {
+ return writer;
+ }
+
+ private FieldWriter promoteToUnion() {
+ String name = vector.getField().getName();
+ TransferPair tp = vector.getTransferPair(vector.getMinorType().name().toLowerCase(), vector.getAllocator());
+ tp.transfer();
+ if (parentContainer != null) {
+ // TODO allow dictionaries in complex types
+ unionVector = parentContainer.addOrGetUnion(name);
+ unionVector.allocateNew();
+ } else if (listVector != null) {
+ unionVector = listVector.promoteToUnion();
+ } else if (fixedListVector != null) {
+ unionVector = fixedListVector.promoteToUnion();
+ } else if (largeListVector != null) {
+ unionVector = largeListVector.promoteToUnion();
+ }
+ unionVector.addVector((FieldVector) tp.getTo());
+ writer = new UnionWriter(unionVector, nullableStructWriterFactory);
+ writer.setPosition(idx());
+ for (int i = 0; i <= idx(); i++) {
+ unionVector.setType(i, vector.getMinorType());
+ }
+ vector = null;
+ state = State.UNION;
+ return writer;
+ }
+
+ @Override
+ public void write(DecimalHolder holder) {
+ getWriter(MinorType.DECIMAL,
+ new ArrowType.Decimal(MAX_DECIMAL_PRECISION, holder.scale, /*bitWidth=*/128)).write(holder);
+ }
+
+ @Override
+ public void writeDecimal(long start, ArrowBuf buffer, ArrowType arrowType) {
+ getWriter(MinorType.DECIMAL, new ArrowType.Decimal(MAX_DECIMAL_PRECISION,
+ ((ArrowType.Decimal) arrowType).getScale(), /*bitWidth=*/128)).writeDecimal(start, buffer, arrowType);
+ }
+
+ @Override
+ public void writeDecimal(BigDecimal value) {
+ getWriter(MinorType.DECIMAL,
+ new ArrowType.Decimal(MAX_DECIMAL_PRECISION, value.scale(), /*bitWidth=*/128)).writeDecimal(value);
+ }
+
+ @Override
+ public void writeBigEndianBytesToDecimal(byte[] value, ArrowType arrowType) {
+ getWriter(MinorType.DECIMAL, new ArrowType.Decimal(MAX_DECIMAL_PRECISION,
+ ((ArrowType.Decimal) arrowType).getScale(), /*bitWidth=*/128)).writeBigEndianBytesToDecimal(value, arrowType);
+ }
+
+ @Override
+ public void write(Decimal256Holder holder) {
+ getWriter(MinorType.DECIMAL256,
+ new ArrowType.Decimal(MAX_DECIMAL256_PRECISION, holder.scale, /*bitWidth=*/256)).write(holder);
+ }
+
+ @Override
+ public void writeDecimal256(long start, ArrowBuf buffer, ArrowType arrowType) {
+ getWriter(MinorType.DECIMAL256, new ArrowType.Decimal(MAX_DECIMAL256_PRECISION,
+ ((ArrowType.Decimal) arrowType).getScale(), /*bitWidth=*/256)).writeDecimal256(start, buffer, arrowType);
+ }
+
+ @Override
+ public void writeDecimal256(BigDecimal value) {
+ getWriter(MinorType.DECIMAL256,
+ new ArrowType.Decimal(MAX_DECIMAL256_PRECISION, value.scale(), /*bitWidth=*/256)).writeDecimal256(value);
+ }
+
+ @Override
+ public void writeBigEndianBytesToDecimal256(byte[] value, ArrowType arrowType) {
+ getWriter(MinorType.DECIMAL256, new ArrowType.Decimal(MAX_DECIMAL256_PRECISION,
+ ((ArrowType.Decimal) arrowType).getScale(),
+ /*bitWidth=*/256)).writeBigEndianBytesToDecimal256(value, arrowType);
+ }
+
+
+ @Override
+ public void allocate() {
+ getWriter().allocate();
+ }
+
+ @Override
+ public void clear() {
+ getWriter().clear();
+ }
+
+ @Override
+ public Field getField() {
+ return getWriter().getField();
+ }
+
+ @Override
+ public int getValueCapacity() {
+ return getWriter().getValueCapacity();
+ }
+
+ @Override
+ public void close() throws Exception {
+ getWriter().close();
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/SingleListReaderImpl.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/SingleListReaderImpl.java
new file mode 100644
index 000000000..9bbe60421
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/SingleListReaderImpl.java
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex.impl;
+
+
+import org.apache.arrow.vector.complex.AbstractContainerVector;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter;
+import org.apache.arrow.vector.types.Types.MinorType;
+
+/**
+ * An implementation of {@link AbstractFieldReader} for lists vectors.
+ */
+@SuppressWarnings("unused")
+public class SingleListReaderImpl extends AbstractFieldReader {
+
+ private final String name;
+ private final AbstractContainerVector container;
+ private FieldReader reader;
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param name The name of field to read in container.
+ * @param container The container holding a list.
+ */
+ public SingleListReaderImpl(String name, AbstractContainerVector container) {
+ super();
+ this.name = name;
+ this.container = container;
+ }
+
+ @Override
+ public void setPosition(int index) {
+ super.setPosition(index);
+ if (reader != null) {
+ reader.setPosition(index);
+ }
+ }
+
+ @Override
+ public Object readObject() {
+ return reader.readObject();
+ }
+
+ @Override
+ public FieldReader reader() {
+ if (reader == null) {
+ reader = container.getChild(name).getReader();
+ setPosition(idx());
+ }
+ return reader;
+ }
+
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.LIST;
+ }
+
+ @Override
+ public boolean isSet() {
+ return false;
+ }
+
+ @Override
+ public void copyAsValue(ListWriter writer) {
+ throw new UnsupportedOperationException("Generic list copying not yet supported. Please resolve to typed list.");
+ }
+
+ @Override
+ public void copyAsField(String name, StructWriter writer) {
+ throw new UnsupportedOperationException("Generic list copying not yet supported. Please resolve to typed list.");
+ }
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/SingleStructReaderImpl.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/SingleStructReaderImpl.java
new file mode 100644
index 000000000..3590e40ce
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/SingleStructReaderImpl.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex.impl;
+
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.complex.NonNullableStructVector;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+
+/**
+ * {@link FieldReader} for a single {@link org.apache.arrow.vector.complex.NonNullableStructVector}.
+ */
+@SuppressWarnings("unused")
+public class SingleStructReaderImpl extends AbstractFieldReader {
+
+ private final NonNullableStructVector vector;
+ private final Map<String, FieldReader> fields = new HashMap<>();
+
+ public SingleStructReaderImpl(NonNullableStructVector vector) {
+ this.vector = vector;
+ }
+
+ private void setChildrenPosition(int index) {
+ for (FieldReader r : fields.values()) {
+ r.setPosition(index);
+ }
+ }
+
+ @Override
+ public Field getField() {
+ return vector.getField();
+ }
+
+ @Override
+ public FieldReader reader(String name) {
+ FieldReader reader = fields.get(name);
+ if (reader == null) {
+ ValueVector child = vector.getChild(name);
+ if (child == null) {
+ reader = NullReader.INSTANCE;
+ } else {
+ reader = child.getReader();
+ }
+ fields.put(name, reader);
+ reader.setPosition(idx());
+ }
+ return reader;
+ }
+
+ @Override
+ public void setPosition(int index) {
+ super.setPosition(index);
+ for (FieldReader r : fields.values()) {
+ r.setPosition(index);
+ }
+ }
+
+ @Override
+ public Object readObject() {
+ return vector.getObject(idx());
+ }
+
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.STRUCT;
+ }
+
+ @Override
+ public boolean isSet() {
+ return true;
+ }
+
+ @Override
+ public java.util.Iterator<String> iterator() {
+ return vector.fieldNameIterator();
+ }
+
+ @Override
+ public void copyAsValue(StructWriter writer) {
+ SingleStructWriter impl = (SingleStructWriter) writer;
+ impl.container.copyFromSafe(idx(), impl.idx(), vector);
+ }
+
+ @Override
+ public void copyAsField(String name, StructWriter writer) {
+ SingleStructWriter impl = (SingleStructWriter) writer.struct(name);
+ impl.container.copyFromSafe(idx(), impl.idx(), vector);
+ }
+
+
+}
+
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/StructOrListWriterImpl.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/StructOrListWriterImpl.java
new file mode 100644
index 000000000..e9c0825dd
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/StructOrListWriterImpl.java
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex.impl;
+
+import org.apache.arrow.vector.complex.writer.BaseWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.StructOrListWriter;
+import org.apache.arrow.vector.complex.writer.BigIntWriter;
+import org.apache.arrow.vector.complex.writer.BitWriter;
+import org.apache.arrow.vector.complex.writer.Float4Writer;
+import org.apache.arrow.vector.complex.writer.Float8Writer;
+import org.apache.arrow.vector.complex.writer.IntWriter;
+import org.apache.arrow.vector.complex.writer.VarBinaryWriter;
+import org.apache.arrow.vector.complex.writer.VarCharWriter;
+
+/**
+ * Concrete implementation of {@link StructOrListWriter}.
+ */
+public class StructOrListWriterImpl implements StructOrListWriter {
+
+ public final BaseWriter.StructWriter struct;
+ public final BaseWriter.ListWriter list;
+
+ /**
+ * Constructs a new instance using a {@link BaseWriter.StructWriter}
+ * (instead of an {@link BaseWriter.ListWriter}).
+ */
+ public StructOrListWriterImpl(final BaseWriter.StructWriter writer) {
+ this.struct = writer;
+ this.list = null;
+ }
+
+ /**
+ * Constructs a new instance using a {@link BaseWriter.ListWriter}
+ * (instead of a {@link BaseWriter.StructWriter}).
+ */
+ public StructOrListWriterImpl(final BaseWriter.ListWriter writer) {
+ this.struct = null;
+ this.list = writer;
+ }
+
+ /**
+ * Start writing to either the list or the struct.
+ */
+ public void start() {
+ if (struct != null) {
+ struct.start();
+ } else {
+ list.startList();
+ }
+ }
+
+ /**
+ * Finish writing to the list or struct.
+ */
+ public void end() {
+ if (struct != null) {
+ struct.end();
+ } else {
+ list.endList();
+ }
+ }
+
+ /**
+ * Creates a new writer for a struct with the given name.
+ */
+ public StructOrListWriter struct(final String name) {
+ assert struct != null;
+ return new StructOrListWriterImpl(struct.struct(name));
+ }
+
+ /**
+ * Creates a new writer for a list of structs.
+ *
+ * @param name Unused.
+ */
+ public StructOrListWriter listoftstruct(final String name) {
+ assert list != null;
+ return new StructOrListWriterImpl(list.struct());
+ }
+
+ public StructOrListWriter list(final String name) {
+ assert struct != null;
+ return new StructOrListWriterImpl(struct.list(name));
+ }
+
+ public boolean isStructWriter() {
+ return struct != null;
+ }
+
+ public boolean isListWriter() {
+ return list != null;
+ }
+
+ public VarCharWriter varChar(final String name) {
+ return (struct != null) ? struct.varChar(name) : list.varChar();
+ }
+
+ public IntWriter integer(final String name) {
+ return (struct != null) ? struct.integer(name) : list.integer();
+ }
+
+ public BigIntWriter bigInt(final String name) {
+ return (struct != null) ? struct.bigInt(name) : list.bigInt();
+ }
+
+ public Float4Writer float4(final String name) {
+ return (struct != null) ? struct.float4(name) : list.float4();
+ }
+
+ public Float8Writer float8(final String name) {
+ return (struct != null) ? struct.float8(name) : list.float8();
+ }
+
+ public BitWriter bit(final String name) {
+ return (struct != null) ? struct.bit(name) : list.bit();
+ }
+
+ public VarBinaryWriter binary(final String name) {
+ return (struct != null) ? struct.varBinary(name) : list.varBinary();
+ }
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionFixedSizeListReader.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionFixedSizeListReader.java
new file mode 100644
index 000000000..ece729ae5
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionFixedSizeListReader.java
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex.impl;
+
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter;
+import org.apache.arrow.vector.complex.writer.FieldWriter;
+import org.apache.arrow.vector.holders.UnionHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+
+/**
+ * Reader for fixed size list vectors.
+ */
+public class UnionFixedSizeListReader extends AbstractFieldReader {
+
+ private final FixedSizeListVector vector;
+ private final ValueVector data;
+ private final int listSize;
+
+ private int currentOffset;
+
+ /**
+ * Constructs a new instance that reads data in <code>vector</code>.
+ */
+ public UnionFixedSizeListReader(FixedSizeListVector vector) {
+ this.vector = vector;
+ this.data = vector.getDataVector();
+ this.listSize = vector.getListSize();
+ }
+
+ @Override
+ public boolean isSet() {
+ return !vector.isNull(idx());
+ }
+
+ @Override
+ public FieldReader reader() {
+ return data.getReader();
+ }
+
+ @Override
+ public Object readObject() {
+ return vector.getObject(idx());
+ }
+
+ @Override
+ public MinorType getMinorType() {
+ return vector.getMinorType();
+ }
+
+ @Override
+ public void setPosition(int index) {
+ super.setPosition(index);
+ data.getReader().setPosition(index * listSize);
+ currentOffset = 0;
+ }
+
+ @Override
+ public void read(int index, UnionHolder holder) {
+ setPosition(idx());
+ for (int i = -1; i < index; i++) {
+ if (!next()) {
+ throw new IndexOutOfBoundsException("Requested " + index + ", size " + listSize);
+ }
+ }
+ holder.reader = data.getReader();
+ holder.isSet = vector.isNull(idx()) ? 0 : 1;
+ }
+
+ @Override
+ public int size() {
+ return listSize;
+ }
+
+ @Override
+ public boolean next() {
+ if (currentOffset < listSize) {
+ data.getReader().setPosition(idx() * listSize + currentOffset++);
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ public void copyAsValue(ListWriter writer) {
+ ComplexCopier.copy(this, (FieldWriter) writer);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListReader.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListReader.java
new file mode 100644
index 000000000..faf088b55
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListReader.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex.impl;
+
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.complex.writer.FieldWriter;
+import org.apache.arrow.vector.holders.UnionHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+
+/**
+ * {@link FieldReader} for list of union types.
+ */
+public class UnionLargeListReader extends AbstractFieldReader {
+
+ private LargeListVector vector;
+ private ValueVector data;
+ private long index;
+ private static final long OFFSET_WIDTH = 8L;
+
+ public UnionLargeListReader(LargeListVector vector) {
+ this.vector = vector;
+ this.data = vector.getDataVector();
+ }
+
+ @Override
+ public Field getField() {
+ return vector.getField();
+ }
+
+ @Override
+ public boolean isSet() {
+ return !vector.isNull(idx());
+ }
+
+ private long currentOffset;
+ private long maxOffset;
+
+ @Override
+ public void setPosition(int index) {
+ super.setPosition(index);
+ currentOffset = vector.getOffsetBuffer().getLong((long) index * OFFSET_WIDTH) - 1;
+ maxOffset = vector.getOffsetBuffer().getLong(((long) index + 1L) * OFFSET_WIDTH);
+ }
+
+ @Override
+ public FieldReader reader() {
+ return data.getReader();
+ }
+
+ @Override
+ public Object readObject() {
+ return vector.getObject(idx());
+ }
+
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.LARGELIST;
+ }
+
+ @Override
+ public void read(int index, UnionHolder holder) {
+ setPosition(index);
+ for (int i = -1; i < index; i++) {
+ next();
+ }
+ holder.reader = data.getReader();
+ holder.isSet = data.getReader().isSet() ? 1 : 0;
+ }
+
+ @Override
+ public int size() {
+ int size = checkedCastToInt(maxOffset - currentOffset - 1); //todo revisit when int64 vectors are done
+ return size < 0 ? 0 : size;
+ }
+
+ @Override
+ public boolean next() {
+ if (currentOffset + 1 < maxOffset) {
+ data.getReader().setPosition(checkedCastToInt(++currentOffset)); // todo revisit when int64 vectors are done
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ public void copyAsValue(UnionLargeListWriter writer) {
+ ComplexCopier.copy(this, (FieldWriter) writer);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListReader.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListReader.java
new file mode 100644
index 000000000..a8c185aef
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListReader.java
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex.impl;
+
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter;
+import org.apache.arrow.vector.complex.writer.FieldWriter;
+import org.apache.arrow.vector.holders.UnionHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+
+/**
+ * {@link FieldReader} for list of union types.
+ */
+public class UnionListReader extends AbstractFieldReader {
+
+ private ListVector vector;
+ private ValueVector data;
+ private static final int OFFSET_WIDTH = 4;
+
+ public UnionListReader(ListVector vector) {
+ this.vector = vector;
+ this.data = vector.getDataVector();
+ }
+
+ @Override
+ public Field getField() {
+ return vector.getField();
+ }
+
+ @Override
+ public boolean isSet() {
+ return !vector.isNull(idx());
+ }
+
+ private int currentOffset;
+ private int maxOffset;
+
+ @Override
+ public void setPosition(int index) {
+ super.setPosition(index);
+ currentOffset = vector.getOffsetBuffer().getInt(index * OFFSET_WIDTH) - 1;
+ maxOffset = vector.getOffsetBuffer().getInt((index + 1) * OFFSET_WIDTH);
+ }
+
+ @Override
+ public FieldReader reader() {
+ return data.getReader();
+ }
+
+ @Override
+ public Object readObject() {
+ return vector.getObject(idx());
+ }
+
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.LIST;
+ }
+
+ @Override
+ public void read(int index, UnionHolder holder) {
+ setPosition(idx());
+ for (int i = -1; i < index; i++) {
+ next();
+ }
+ holder.reader = data.getReader();
+ holder.isSet = data.getReader().isSet() ? 1 : 0;
+ }
+
+ @Override
+ public int size() {
+ int size = maxOffset - currentOffset - 1;
+ return size < 0 ? 0 : size;
+ }
+
+ @Override
+ public boolean next() {
+ if (currentOffset + 1 < maxOffset) {
+ data.getReader().setPosition(++currentOffset);
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ public void copyAsValue(ListWriter writer) {
+ ComplexCopier.copy(this, (FieldWriter) writer);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionMapReader.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionMapReader.java
new file mode 100644
index 000000000..7a1bdce9b
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionMapReader.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex.impl;
+
+import org.apache.arrow.vector.complex.MapVector;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.types.Types.MinorType;
+
+/**
+ * Reader for a MapVector.
+ */
+public class UnionMapReader extends UnionListReader {
+
+ private String keyName = MapVector.KEY_NAME;
+ private String valueName = MapVector.VALUE_NAME;
+
+ /**
+ * Construct a new reader for the given vector.
+ *
+ * @param vector Vector to read from.
+ */
+ public UnionMapReader(MapVector vector) {
+ super(vector);
+ }
+
+ /**
+ * Set the key, value field names to read.
+ *
+ * @param key Field name for key.
+ * @param value Field name for value.
+ */
+ public void setKeyValueNames(String key, String value) {
+ keyName = key;
+ valueName = value;
+ }
+
+ /**
+ * Start reading a key from the map entry.
+ *
+ * @return reader that can be used to read the key.
+ */
+ public FieldReader key() {
+ return reader().reader(keyName);
+ }
+
+ /**
+ * Start reading a value element from the map entry.
+ *
+ * @return reader that can be used to read the value.
+ */
+ public FieldReader value() {
+ return reader().reader(valueName);
+ }
+
+ /**
+ * Return the MinorType of the reader as MAP.
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.MAP;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/reader/FieldReader.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/reader/FieldReader.java
new file mode 100644
index 000000000..a888abbaa
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/reader/FieldReader.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex.reader;
+
+import org.apache.arrow.vector.complex.reader.BaseReader.ListReader;
+import org.apache.arrow.vector.complex.reader.BaseReader.MapReader;
+import org.apache.arrow.vector.complex.reader.BaseReader.RepeatedListReader;
+import org.apache.arrow.vector.complex.reader.BaseReader.RepeatedMapReader;
+import org.apache.arrow.vector.complex.reader.BaseReader.RepeatedStructReader;
+import org.apache.arrow.vector.complex.reader.BaseReader.ScalarReader;
+import org.apache.arrow.vector.complex.reader.BaseReader.StructReader;
+
+
+/**
+ * Composite of all Reader types (e.g. {@link StructReader}, {@link ScalarReader}, etc). Each reader type
+ * is in essence a way of iterating over a {@link org.apache.arrow.vector.ValueVector}.
+ */
+public interface FieldReader extends StructReader, ListReader, MapReader, ScalarReader,
+ RepeatedStructReader, RepeatedListReader, RepeatedMapReader {
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/writer/FieldWriter.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/writer/FieldWriter.java
new file mode 100644
index 000000000..a3cb7108a
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/writer/FieldWriter.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex.writer;
+
+import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.ScalarWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter;
+
+/**
+ * Composite of all writer types. Writers are convenience classes for incrementally
+ * adding values to {@linkplain org.apache.arrow.vector.ValueVector}s.
+ */
+public interface FieldWriter extends StructWriter, ListWriter, MapWriter, ScalarWriter {
+ void allocate();
+
+ void clear();
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compression/AbstractCompressionCodec.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compression/AbstractCompressionCodec.java
new file mode 100644
index 000000000..39b32968d
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compression/AbstractCompressionCodec.java
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.compression;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.util.MemoryUtil;
+import org.apache.arrow.util.Preconditions;
+
+/**
+ * The base class for concrete compression codecs, providing
+ * common logic for all compression codecs.
+ */
+public abstract class AbstractCompressionCodec implements CompressionCodec {
+
+ @Override
+ public ArrowBuf compress(BufferAllocator allocator, ArrowBuf uncompressedBuffer) {
+ if (uncompressedBuffer.writerIndex() == 0L) {
+ // shortcut for empty buffer
+ ArrowBuf compressedBuffer = allocator.buffer(CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH);
+ compressedBuffer.setLong(0, 0);
+ compressedBuffer.writerIndex(CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH);
+ uncompressedBuffer.close();
+ return compressedBuffer;
+ }
+
+ ArrowBuf compressedBuffer = doCompress(allocator, uncompressedBuffer);
+ long compressedLength = compressedBuffer.writerIndex() - CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH;
+ long uncompressedLength = uncompressedBuffer.writerIndex();
+
+ if (compressedLength > uncompressedLength) {
+ // compressed buffer is larger, send the raw buffer
+ compressedBuffer.close();
+ compressedBuffer = CompressionUtil.packageRawBuffer(allocator, uncompressedBuffer);
+ } else {
+ writeUncompressedLength(compressedBuffer, uncompressedLength);
+ }
+
+ uncompressedBuffer.close();
+ return compressedBuffer;
+ }
+
+ @Override
+ public ArrowBuf decompress(BufferAllocator allocator, ArrowBuf compressedBuffer) {
+ Preconditions.checkArgument(compressedBuffer.writerIndex() >= CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH,
+ "Not enough data to decompress.");
+
+ long decompressedLength = readUncompressedLength(compressedBuffer);
+
+ if (decompressedLength == 0L) {
+ // shortcut for empty buffer
+ compressedBuffer.close();
+ return allocator.getEmpty();
+ }
+
+ if (decompressedLength == CompressionUtil.NO_COMPRESSION_LENGTH) {
+ // no compression
+ return CompressionUtil.extractUncompressedBuffer(compressedBuffer);
+ }
+
+ ArrowBuf decompressedBuffer = doDecompress(allocator, compressedBuffer);
+ compressedBuffer.close();
+ return decompressedBuffer;
+ }
+
+ protected void writeUncompressedLength(ArrowBuf compressedBuffer, long uncompressedLength) {
+ if (!MemoryUtil.LITTLE_ENDIAN) {
+ uncompressedLength = Long.reverseBytes(uncompressedLength);
+ }
+ // first 8 bytes reserved for uncompressed length, according to the specification
+ compressedBuffer.setLong(0, uncompressedLength);
+ }
+
+ protected long readUncompressedLength(ArrowBuf compressedBuffer) {
+ long decompressedLength = compressedBuffer.getLong(0);
+ if (!MemoryUtil.LITTLE_ENDIAN) {
+ decompressedLength = Long.reverseBytes(decompressedLength);
+ }
+ return decompressedLength;
+ }
+
+ /**
+ * The method that actually performs the data compression.
+ * The layout of the returned compressed buffer is the compressed data,
+ * plus 8 bytes reserved at the beginning of the buffer for the uncompressed data size.
+ * <p>
+ * Please note that this method is not responsible for releasing the uncompressed buffer.
+ * </p>
+ */
+ protected abstract ArrowBuf doCompress(BufferAllocator allocator, ArrowBuf uncompressedBuffer);
+
+ /**
+ * The method that actually performs the data decompression.
+ * The layout of the compressed buffer is the compressed data,
+ * plus 8 bytes at the beginning of the buffer storing the uncompressed data size.
+ * <p>
+ * Please note that this method is not responsible for releasing the compressed buffer.
+ * </p>
+ */
+ protected abstract ArrowBuf doDecompress(BufferAllocator allocator, ArrowBuf compressedBuffer);
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compression/CompressionCodec.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compression/CompressionCodec.java
new file mode 100644
index 000000000..a6dd8b51f
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compression/CompressionCodec.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.compression;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+
+/**
+ * The codec for compression/decompression.
+ */
+public interface CompressionCodec {
+
+ /**
+ * Compress a buffer.
+ * @param allocator the allocator for allocating memory for compressed buffer.
+ * @param uncompressedBuffer the buffer to compress.
+ * Implementation of this method should take care of releasing this buffer.
+ * @return the compressed buffer
+ */
+ ArrowBuf compress(BufferAllocator allocator, ArrowBuf uncompressedBuffer);
+
+ /**
+ * Decompress a buffer.
+ * @param allocator the allocator for allocating memory for decompressed buffer.
+ * @param compressedBuffer the buffer to be decompressed.
+ * Implementation of this method should take care of releasing this buffer.
+ * @return the decompressed buffer.
+ */
+ ArrowBuf decompress(BufferAllocator allocator, ArrowBuf compressedBuffer);
+
+ /**
+ * Gets the type of the codec.
+ * @return the type of the codec.
+ */
+ CompressionUtil.CodecType getCodecType();
+
+ /**
+ * Factory to create compression codec.
+ */
+ interface Factory {
+
+ /**
+ * Creates the codec based on the codec type.
+ */
+ CompressionCodec createCodec(CompressionUtil.CodecType codecType);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compression/CompressionUtil.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compression/CompressionUtil.java
new file mode 100644
index 000000000..1deb38c84
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compression/CompressionUtil.java
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.compression;
+
+import org.apache.arrow.flatbuf.BodyCompressionMethod;
+import org.apache.arrow.flatbuf.CompressionType;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.ipc.message.ArrowBodyCompression;
+
+/**
+ * Utilities for data compression/decompression.
+ */
+public class CompressionUtil {
+
+ /**
+ * Compression codec types corresponding to flat buffer implementation in {@link CompressionType}.
+ */
+ public enum CodecType {
+
+ NO_COMPRESSION(NoCompressionCodec.COMPRESSION_TYPE),
+
+ LZ4_FRAME(org.apache.arrow.flatbuf.CompressionType.LZ4_FRAME),
+
+ ZSTD(org.apache.arrow.flatbuf.CompressionType.ZSTD);
+
+ private final byte type;
+
+ CodecType(byte type) {
+ this.type = type;
+ }
+
+ public byte getType() {
+ return type;
+ }
+
+ /**
+ * Gets the codec type from the compression type defined in {@link CompressionType}.
+ */
+ public static CodecType fromCompressionType(byte type) {
+ for (CodecType codecType : values()) {
+ if (codecType.type == type) {
+ return codecType;
+ }
+ }
+ return NO_COMPRESSION;
+ }
+ }
+
+ public static final long SIZE_OF_UNCOMPRESSED_LENGTH = 8L;
+
+ /**
+ * Special flag to indicate no compression.
+ * (e.g. when the compressed buffer has a larger size.)
+ */
+ public static final long NO_COMPRESSION_LENGTH = -1L;
+
+ private CompressionUtil() {
+ }
+
+ /**
+ * Creates the {@link ArrowBodyCompression} object, given the {@link CompressionCodec}.
+ * The implementation of this method should depend on the values of
+ * {@link org.apache.arrow.flatbuf.CompressionType#names}.
+ */
+ public static ArrowBodyCompression createBodyCompression(CompressionCodec codec) {
+ return new ArrowBodyCompression(codec.getCodecType().getType(), BodyCompressionMethod.BUFFER);
+ }
+
+ /**
+ * Process compression by compressing the buffer as is.
+ */
+ public static ArrowBuf packageRawBuffer(BufferAllocator allocator, ArrowBuf inputBuffer) {
+ ArrowBuf compressedBuffer = allocator.buffer(SIZE_OF_UNCOMPRESSED_LENGTH + inputBuffer.writerIndex());
+ compressedBuffer.setLong(0, NO_COMPRESSION_LENGTH);
+ compressedBuffer.setBytes(SIZE_OF_UNCOMPRESSED_LENGTH, inputBuffer, 0, inputBuffer.writerIndex());
+ compressedBuffer.writerIndex(SIZE_OF_UNCOMPRESSED_LENGTH + inputBuffer.writerIndex());
+ return compressedBuffer;
+ }
+
+ /**
+ * Process decompression by slicing the buffer that contains the uncompressed bytes.
+ */
+ public static ArrowBuf extractUncompressedBuffer(ArrowBuf inputBuffer) {
+ return inputBuffer.slice(SIZE_OF_UNCOMPRESSED_LENGTH,
+ inputBuffer.writerIndex() - SIZE_OF_UNCOMPRESSED_LENGTH);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compression/NoCompressionCodec.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compression/NoCompressionCodec.java
new file mode 100644
index 000000000..e5e8e9d46
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compression/NoCompressionCodec.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.compression;
+
+import org.apache.arrow.flatbuf.BodyCompressionMethod;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.ipc.message.ArrowBodyCompression;
+
+/**
+ * The default compression codec that does no compression.
+ */
+public class NoCompressionCodec implements CompressionCodec {
+
+ public static final NoCompressionCodec INSTANCE = new NoCompressionCodec();
+
+ public static final byte COMPRESSION_TYPE = -1;
+
+ public static final ArrowBodyCompression DEFAULT_BODY_COMPRESSION =
+ new ArrowBodyCompression(COMPRESSION_TYPE, BodyCompressionMethod.BUFFER);
+
+ private NoCompressionCodec() {
+ }
+
+ @Override
+ public ArrowBuf compress(BufferAllocator allocator, ArrowBuf uncompressedBuffer) {
+ return uncompressedBuffer;
+ }
+
+ @Override
+ public ArrowBuf decompress(BufferAllocator allocator, ArrowBuf compressedBuffer) {
+ return compressedBuffer;
+ }
+
+ @Override
+ public CompressionUtil.CodecType getCodecType() {
+ return CompressionUtil.CodecType.NO_COMPRESSION;
+ }
+
+ /**
+ * The default factory that creates a {@link NoCompressionCodec}.
+ */
+ public static class Factory implements CompressionCodec.Factory {
+
+ public static final NoCompressionCodec.Factory INSTANCE = new NoCompressionCodec.Factory();
+
+ @Override
+ public CompressionCodec createCodec(CompressionUtil.CodecType codecType) {
+ return NoCompressionCodec.INSTANCE;
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/Dictionary.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/Dictionary.java
new file mode 100644
index 000000000..6f40e5814
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/Dictionary.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.dictionary;
+
+import java.util.Objects;
+
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.compare.VectorEqualsVisitor;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+
+/**
+ * A dictionary (integer to Value mapping) that is used to facilitate
+ * dictionary encoding compression.
+ */
+public class Dictionary {
+
+ private final DictionaryEncoding encoding;
+ private final FieldVector dictionary;
+
+ public Dictionary(FieldVector dictionary, DictionaryEncoding encoding) {
+ this.dictionary = dictionary;
+ this.encoding = encoding;
+ }
+
+ public FieldVector getVector() {
+ return dictionary;
+ }
+
+ public DictionaryEncoding getEncoding() {
+ return encoding;
+ }
+
+ public ArrowType getVectorType() {
+ return dictionary.getField().getType();
+ }
+
+ @Override
+ public String toString() {
+ return "Dictionary " + encoding + " " + dictionary;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ Dictionary that = (Dictionary) o;
+ return Objects.equals(encoding, that.encoding) &&
+ new VectorEqualsVisitor().vectorEquals(that.dictionary, dictionary);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(encoding, dictionary);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryEncoder.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryEncoder.java
new file mode 100644
index 000000000..babb0dbd3
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryEncoder.java
@@ -0,0 +1,196 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.dictionary;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.memory.util.hash.SimpleHasher;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.BaseIntVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * Encoder/decoder for Dictionary encoded {@link ValueVector}. Dictionary encoding produces an
+ * integer {@link ValueVector}. Each entry in the Vector is index into the dictionary which can hold
+ * values of any type.
+ */
+public class DictionaryEncoder {
+
+ private final DictionaryHashTable hashTable;
+ private final Dictionary dictionary;
+ private final BufferAllocator allocator;
+
+ /**
+ * Construct an instance.
+ */
+ public DictionaryEncoder(Dictionary dictionary, BufferAllocator allocator) {
+ this (dictionary, allocator, SimpleHasher.INSTANCE);
+ }
+
+ /**
+ * Construct an instance.
+ */
+ public DictionaryEncoder(Dictionary dictionary, BufferAllocator allocator, ArrowBufHasher hasher) {
+ this.dictionary = dictionary;
+ this.allocator = allocator;
+ hashTable = new DictionaryHashTable(dictionary.getVector(), hasher);
+ }
+
+ /**
+ * Dictionary encodes a vector with a provided dictionary. The dictionary must contain all values in the vector.
+ *
+ * @param vector vector to encode
+ * @param dictionary dictionary used for encoding
+ * @return dictionary encoded vector
+ */
+ public static ValueVector encode(ValueVector vector, Dictionary dictionary) {
+ DictionaryEncoder encoder = new DictionaryEncoder(dictionary, vector.getAllocator());
+ return encoder.encode(vector);
+ }
+
+ /**
+ * Decodes a dictionary encoded array using the provided dictionary.
+ *
+ * @param indices dictionary encoded values, must be int type
+ * @param dictionary dictionary used to decode the values
+ * @return vector with values restored from dictionary
+ */
+ public static ValueVector decode(ValueVector indices, Dictionary dictionary) {
+ DictionaryEncoder encoder = new DictionaryEncoder(dictionary, indices.getAllocator());
+ return encoder.decode(indices);
+ }
+
+ /**
+ * Get the indexType according to the dictionary vector valueCount.
+ * @param valueCount dictionary vector valueCount.
+ * @return index type.
+ */
+ public static ArrowType.Int getIndexType(int valueCount) {
+ Preconditions.checkArgument(valueCount >= 0);
+ if (valueCount <= Byte.MAX_VALUE) {
+ return new ArrowType.Int(8, true);
+ } else if (valueCount <= Character.MAX_VALUE) {
+ return new ArrowType.Int(16, true);
+ } else if (valueCount <= Integer.MAX_VALUE) {
+ return new ArrowType.Int(32, true);
+ } else {
+ return new ArrowType.Int(64, true);
+ }
+ }
+
+ /**
+ * Populates indices between start and end with the encoded values of vector.
+ * @param vector the vector to encode
+ * @param indices the index vector
+ * @param encoding the hash table for encoding
+ * @param start the start index
+ * @param end the end index
+ */
+ static void buildIndexVector(
+ ValueVector vector,
+ BaseIntVector indices,
+ DictionaryHashTable encoding,
+ int start,
+ int end) {
+
+ for (int i = start; i < end; i++) {
+ if (!vector.isNull(i)) {
+ // if it's null leave it null
+ // note: this may fail if value was not included in the dictionary
+ int encoded = encoding.getIndex(i, vector);
+ if (encoded == -1) {
+ throw new IllegalArgumentException("Dictionary encoding not defined for value:" + vector.getObject(i));
+ }
+ indices.setWithPossibleTruncate(i, encoded);
+ }
+ }
+ }
+
+ /**
+ * Retrieve values to target vector from index vector.
+ * @param indices the index vector
+ * @param transfer the {@link TransferPair} to copy dictionary data into target vector.
+ * @param dictionaryCount the value count of dictionary vector.
+ * @param start the start index
+ * @param end the end index
+ */
+ static void retrieveIndexVector(
+ BaseIntVector indices,
+ TransferPair transfer,
+ int dictionaryCount,
+ int start,
+ int end) {
+ for (int i = start; i < end; i++) {
+ if (!indices.isNull(i)) {
+ int indexAsInt = (int) indices.getValueAsLong(i);
+ if (indexAsInt > dictionaryCount) {
+ throw new IllegalArgumentException("Provided dictionary does not contain value for index " + indexAsInt);
+ }
+ transfer.copyValueSafe(indexAsInt, i);
+ }
+ }
+ }
+
+ /**
+ * Encodes a vector with the built hash table in this encoder.
+ */
+ public ValueVector encode(ValueVector vector) {
+
+ Field valueField = vector.getField();
+ FieldType indexFieldType = new FieldType(valueField.isNullable(), dictionary.getEncoding().getIndexType(),
+ dictionary.getEncoding(), valueField.getMetadata());
+ Field indexField = new Field(valueField.getName(), indexFieldType, null);
+
+ // vector to hold our indices (dictionary encoded values)
+ FieldVector createdVector = indexField.createVector(allocator);
+ if (! (createdVector instanceof BaseIntVector)) {
+ throw new IllegalArgumentException("Dictionary encoding does not have a valid int type:" +
+ createdVector.getClass());
+ }
+
+ BaseIntVector indices = (BaseIntVector) createdVector;
+ indices.allocateNew();
+
+ buildIndexVector(vector, indices, hashTable, 0, vector.getValueCount());
+ indices.setValueCount(vector.getValueCount());
+ return indices;
+ }
+
+ /**
+ * Decodes a vector with the built hash table in this encoder.
+ */
+ public ValueVector decode(ValueVector indices) {
+ int count = indices.getValueCount();
+ ValueVector dictionaryVector = dictionary.getVector();
+ int dictionaryCount = dictionaryVector.getValueCount();
+ // copy the dictionary values into the decoded vector
+ TransferPair transfer = dictionaryVector.getTransferPair(allocator);
+ transfer.getTo().allocateNewSafe();
+
+ BaseIntVector baseIntVector = (BaseIntVector) indices;
+ retrieveIndexVector(baseIntVector, transfer, dictionaryCount, 0, count);
+ ValueVector decoded = transfer.getTo();
+ decoded.setValueCount(count);
+ return decoded;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryHashTable.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryHashTable.java
new file mode 100644
index 000000000..9926a8e2a
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryHashTable.java
@@ -0,0 +1,295 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.dictionary;
+
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.memory.util.hash.SimpleHasher;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.compare.Range;
+import org.apache.arrow.vector.compare.RangeEqualsVisitor;
+
+/**
+ * HashTable used for Dictionary encoding. It holds two vectors (the vector to encode and dictionary vector)
+ * It stores the index in dictionary vector and for a given index in encode vector,
+ * it could return dictionary index.
+ */
+public class DictionaryHashTable {
+
+ /**
+ * Represents a null value in map.
+ */
+ static final int NULL_VALUE = -1;
+
+ /**
+ * The default initial capacity - MUST be a power of two.
+ */
+ static final int DEFAULT_INITIAL_CAPACITY = 1 << 4;
+
+ /**
+ * The maximum capacity, used if a higher value is implicitly specified
+ * by either of the constructors with arguments.
+ */
+ static final int MAXIMUM_CAPACITY = 1 << 30;
+
+ /**
+ * The load factor used when none specified in constructor.
+ */
+ static final float DEFAULT_LOAD_FACTOR = 0.75f;
+
+ static final DictionaryHashTable.Entry[] EMPTY_TABLE = {};
+
+ /**
+ * The table, initialized on first use, and resized as
+ * necessary. When allocated, length is always a power of two.
+ */
+ transient DictionaryHashTable.Entry[] table = EMPTY_TABLE;
+
+ /**
+ * The number of key-value mappings contained in this map.
+ */
+ transient int size;
+
+ /**
+ * The next size value at which to resize (capacity * load factor).
+ */
+ int threshold;
+
+ /**
+ * The load factor for the hash table.
+ */
+ final float loadFactor;
+
+ private final ValueVector dictionary;
+
+ private final ArrowBufHasher hasher;
+
+ /**
+ * Constructs an empty map with the specified initial capacity and load factor.
+ */
+ public DictionaryHashTable(int initialCapacity, ValueVector dictionary, ArrowBufHasher hasher) {
+ if (initialCapacity < 0) {
+ throw new IllegalArgumentException("Illegal initial capacity: " +
+ initialCapacity);
+ }
+ if (initialCapacity > MAXIMUM_CAPACITY) {
+ initialCapacity = MAXIMUM_CAPACITY;
+ }
+ this.loadFactor = DEFAULT_LOAD_FACTOR;
+ this.threshold = initialCapacity;
+
+ this.dictionary = dictionary;
+
+ this.hasher = hasher;
+
+ // build hash table
+ for (int i = 0; i < this.dictionary.getValueCount(); i++) {
+ put(i);
+ }
+ }
+
+ public DictionaryHashTable(ValueVector dictionary, ArrowBufHasher hasher) {
+ this(DEFAULT_INITIAL_CAPACITY, dictionary, hasher);
+ }
+
+ public DictionaryHashTable(ValueVector dictionary) {
+ this(dictionary, SimpleHasher.INSTANCE);
+ }
+
+ /**
+ * Compute the capacity with given threshold and create init table.
+ */
+ private void inflateTable(int threshold) {
+ int capacity = roundUpToPowerOf2(threshold);
+ this.threshold = (int) Math.min(capacity * loadFactor, MAXIMUM_CAPACITY + 1);
+ table = new DictionaryHashTable.Entry[capacity];
+ }
+
+ /**
+ * Computes the storage location in an array for the given hashCode.
+ */
+ static int indexFor(int h, int length) {
+ return h & (length - 1);
+ }
+
+ /**
+ * Returns a power of two size for the given size.
+ */
+ static final int roundUpToPowerOf2(int size) {
+ int n = size - 1;
+ n |= n >>> 1;
+ n |= n >>> 2;
+ n |= n >>> 4;
+ n |= n >>> 8;
+ n |= n >>> 16;
+ return (n < 0) ? 1 : (n >= MAXIMUM_CAPACITY) ? MAXIMUM_CAPACITY : n + 1;
+ }
+
+ /**
+ * get the corresponding dictionary index with the given index in vector which to encode.
+ * @param indexInArray index in vector.
+ * @return dictionary vector index or -1 if no value equals.
+ */
+ public int getIndex(int indexInArray, ValueVector toEncode) {
+ int hash = toEncode.hashCode(indexInArray, this.hasher);
+ int index = indexFor(hash, table.length);
+
+ RangeEqualsVisitor equalVisitor = new RangeEqualsVisitor(dictionary, toEncode, null);
+ Range range = new Range(0, 0, 1);
+
+ for (DictionaryHashTable.Entry e = table[index]; e != null ; e = e.next) {
+ if (e.hash == hash) {
+ int dictIndex = e.index;
+
+ range = range.setRightStart(indexInArray)
+ .setLeftStart(dictIndex);
+ if (equalVisitor.rangeEquals(range)) {
+ return dictIndex;
+ }
+ }
+ }
+ return NULL_VALUE;
+ }
+
+ /**
+ * put the index of dictionary vector to build hash table.
+ */
+ private void put(int indexInDictionary) {
+ if (table == EMPTY_TABLE) {
+ inflateTable(threshold);
+ }
+
+ int hash = dictionary.hashCode(indexInDictionary, this.hasher);
+ int i = indexFor(hash, table.length);
+ for (DictionaryHashTable.Entry e = table[i]; e != null; e = e.next) {
+ if (e.hash == hash && e.index == indexInDictionary) {
+ //already has this index, return
+ return;
+ }
+ }
+
+ addEntry(hash, indexInDictionary, i);
+ }
+
+ /**
+ * Create a new Entry at the specific position of table.
+ */
+ void createEntry(int hash, int index, int bucketIndex) {
+ DictionaryHashTable.Entry e = table[bucketIndex];
+ table[bucketIndex] = new DictionaryHashTable.Entry(hash, index, e);
+ size++;
+ }
+
+ /**
+ * Add Entry at the specified location of the table.
+ */
+ void addEntry(int hash, int index, int bucketIndex) {
+ if ((size >= threshold) && (null != table[bucketIndex])) {
+ resize(2 * table.length);
+ bucketIndex = indexFor(hash, table.length);
+ }
+
+ createEntry(hash, index, bucketIndex);
+ }
+
+ /**
+ * Resize table with given new capacity.
+ */
+ void resize(int newCapacity) {
+ DictionaryHashTable.Entry[] oldTable = table;
+ int oldCapacity = oldTable.length;
+ if (oldCapacity == MAXIMUM_CAPACITY) {
+ threshold = Integer.MAX_VALUE;
+ return;
+ }
+
+ DictionaryHashTable.Entry[] newTable = new DictionaryHashTable.Entry[newCapacity];
+ transfer(newTable);
+ table = newTable;
+ threshold = (int) Math.min(newCapacity * loadFactor, MAXIMUM_CAPACITY + 1);
+ }
+
+ /**
+ * Transfer entries into new table from old table.
+ * @param newTable new table
+ */
+ void transfer(DictionaryHashTable.Entry[] newTable) {
+ int newCapacity = newTable.length;
+ for (DictionaryHashTable.Entry e : table) {
+ while (null != e) {
+ DictionaryHashTable.Entry next = e.next;
+ int i = indexFor(e.hash, newCapacity);
+ e.next = newTable[i];
+ newTable[i] = e;
+ e = next;
+ }
+ }
+ }
+
+ /**
+ * Returns the number of mappings in this Map.
+ */
+ public int size() {
+ return size;
+ }
+
+ /**
+ * Removes all elements from this map, leaving it empty.
+ */
+ public void clear() {
+ size = 0;
+ for (int i = 0; i < table.length; i++) {
+ table[i] = null;
+ }
+ }
+
+ /**
+ * Class to keep dictionary index data within hash table.
+ */
+ static class Entry {
+ //dictionary index
+ int index;
+ DictionaryHashTable.Entry next;
+ int hash;
+
+ Entry(int hash, int index, DictionaryHashTable.Entry next) {
+ this.index = index;
+ this.hash = hash;
+ this.next = next;
+ }
+
+ public final int getIndex() {
+ return this.index;
+ }
+
+ @Override
+ public int hashCode() {
+ return hash;
+ }
+
+ public final boolean equals(Object o) {
+ if (!(o instanceof DictionaryHashTable.Entry)) {
+ return false;
+ }
+ DictionaryHashTable.Entry e = (DictionaryHashTable.Entry) o;
+ if (index == e.getIndex()) {
+ return true;
+ }
+ return false;
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryProvider.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryProvider.java
new file mode 100644
index 000000000..21165c07d
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryProvider.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.dictionary;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * A manager for association of dictionary IDs to their corresponding {@link Dictionary}.
+ */
+public interface DictionaryProvider {
+
+ /** Return the dictionary for the given ID. */
+ Dictionary lookup(long id);
+
+ /**
+ * Implementation of {@link DictionaryProvider} that is backed by a hash-map.
+ */
+ class MapDictionaryProvider implements DictionaryProvider {
+
+ private final Map<Long, Dictionary> map;
+
+ /**
+ * Constructs a new instance from the given dictionaries.
+ */
+ public MapDictionaryProvider(Dictionary... dictionaries) {
+ this.map = new HashMap<>();
+ for (Dictionary dictionary : dictionaries) {
+ put(dictionary);
+ }
+ }
+
+ public void put(Dictionary dictionary) {
+ map.put(dictionary.getEncoding().getId(), dictionary);
+ }
+
+ public final Set<Long> getDictionaryIds() {
+ return map.keySet();
+ }
+
+ @Override
+ public Dictionary lookup(long id) {
+ return map.get(id);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/ListSubfieldEncoder.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/ListSubfieldEncoder.java
new file mode 100644
index 000000000..dd2bb26e3
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/ListSubfieldEncoder.java
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.dictionary;
+
+import java.util.Collections;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.memory.util.hash.SimpleHasher;
+import org.apache.arrow.vector.BaseIntVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.complex.BaseListVector;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * Sub fields encoder/decoder for Dictionary encoded {@link BaseListVector}.
+ */
+public class ListSubfieldEncoder {
+
+ private final DictionaryHashTable hashTable;
+ private final Dictionary dictionary;
+ private final BufferAllocator allocator;
+
+ public ListSubfieldEncoder(Dictionary dictionary, BufferAllocator allocator) {
+ this (dictionary, allocator, SimpleHasher.INSTANCE);
+ }
+
+ /**
+ * Construct an instance.
+ */
+ public ListSubfieldEncoder(Dictionary dictionary, BufferAllocator allocator, ArrowBufHasher hasher) {
+ this.dictionary = dictionary;
+ this.allocator = allocator;
+ BaseListVector dictVector = (BaseListVector) dictionary.getVector();
+ hashTable = new DictionaryHashTable(getDataVector(dictVector), hasher);
+ }
+
+ private FieldVector getDataVector(BaseListVector vector) {
+ return vector.getChildrenFromFields().get(0);
+ }
+
+ private BaseListVector cloneVector(BaseListVector vector) {
+
+ final FieldType fieldType = vector.getField().getFieldType();
+ BaseListVector cloned = (BaseListVector) fieldType.createNewSingleVector(vector.getField().getName(),
+ allocator, /*schemaCallBack=*/null);
+
+ final ArrowFieldNode fieldNode = new ArrowFieldNode(vector.getValueCount(), vector.getNullCount());
+ cloned.loadFieldBuffers(fieldNode, vector.getFieldBuffers());
+
+ return cloned;
+ }
+
+ /**
+ * Dictionary encodes subfields for complex vector with a provided dictionary.
+ * The dictionary must contain all values in the sub fields vector.
+ * @param vector vector to encode
+ * @return dictionary encoded vector
+ */
+ public BaseListVector encodeListSubField(BaseListVector vector) {
+ final int valueCount = vector.getValueCount();
+
+ FieldType indexFieldType = new FieldType(vector.getField().isNullable(),
+ dictionary.getEncoding().getIndexType(), dictionary.getEncoding(), vector.getField().getMetadata());
+ Field valueField = new Field(vector.getField().getName(), indexFieldType, null);
+
+ // clone list vector and initialize data vector
+ BaseListVector encoded = cloneVector(vector);
+ encoded.initializeChildrenFromFields(Collections.singletonList(valueField));
+ BaseIntVector indices = (BaseIntVector) getDataVector(encoded);
+
+ ValueVector dataVector = getDataVector(vector);
+ for (int i = 0; i < valueCount; i++) {
+ if (!vector.isNull(i)) {
+ int start = vector.getElementStartIndex(i);
+ int end = vector.getElementEndIndex(i);
+
+ DictionaryEncoder.buildIndexVector(dataVector, indices, hashTable, start, end);
+ }
+ }
+
+ return encoded;
+ }
+
+ /**
+ * Decodes a dictionary subfields encoded vector using the provided dictionary.
+ * @param vector dictionary encoded vector, its data vector must be int type
+ * @return vector with values restored from dictionary
+ */
+ public BaseListVector decodeListSubField(BaseListVector vector) {
+
+ int valueCount = vector.getValueCount();
+ BaseListVector dictionaryVector = (BaseListVector) dictionary.getVector();
+ int dictionaryValueCount = getDataVector(dictionaryVector).getValueCount();
+
+ // clone list vector and initialize data vector
+ BaseListVector decoded = cloneVector(vector);
+ Field dataVectorField = getDataVector(dictionaryVector).getField();
+ decoded.initializeChildrenFromFields(Collections.singletonList(dataVectorField));
+
+ // get data vector
+ ValueVector dataVector = getDataVector(decoded);
+
+ TransferPair transfer = getDataVector(dictionaryVector).makeTransferPair(dataVector);
+ BaseIntVector indices = (BaseIntVector) getDataVector(vector);
+
+ for (int i = 0; i < valueCount; i++) {
+
+ if (!vector.isNull(i)) {
+ int start = vector.getElementStartIndex(i);
+ int end = vector.getElementEndIndex(i);
+
+ DictionaryEncoder.retrieveIndexVector(indices, transfer, dictionaryValueCount, start, end);
+ }
+ }
+ return decoded;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/StructSubfieldEncoder.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/StructSubfieldEncoder.java
new file mode 100644
index 000000000..6542b298d
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/StructSubfieldEncoder.java
@@ -0,0 +1,196 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.dictionary;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.memory.util.hash.SimpleHasher;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.BaseIntVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * Sub fields encoder/decoder for Dictionary encoded {@link StructVector}.
+ * Notes that child vectors within struct vector can either be dictionary encodable or not.
+ */
+public class StructSubfieldEncoder {
+
+ private final BufferAllocator allocator;
+
+ private final DictionaryProvider.MapDictionaryProvider provider;
+ private final Map<Long, DictionaryHashTable> dictionaryIdToHashTable;
+
+ /**
+ * Construct an instance.
+ */
+ public StructSubfieldEncoder(BufferAllocator allocator, DictionaryProvider.MapDictionaryProvider provider) {
+ this (allocator, provider, SimpleHasher.INSTANCE);
+ }
+
+ /**
+ * Construct an instance.
+ */
+ public StructSubfieldEncoder(
+ BufferAllocator allocator,
+ DictionaryProvider.MapDictionaryProvider provider,
+ ArrowBufHasher hasher) {
+
+ this.allocator = allocator;
+ this.provider = provider;
+
+ this.dictionaryIdToHashTable = new HashMap<>();
+
+ provider.getDictionaryIds().forEach(id ->
+ dictionaryIdToHashTable.put(id, new DictionaryHashTable(provider.lookup(id).getVector(), hasher)));
+ }
+
+ private FieldVector getChildVector(StructVector vector, int index) {
+ return vector.getChildrenFromFields().get(index);
+ }
+
+ private StructVector cloneVector(StructVector vector) {
+
+ final FieldType fieldType = vector.getField().getFieldType();
+ StructVector cloned = (StructVector) fieldType.createNewSingleVector(
+ vector.getField().getName(), allocator, /*schemaCallback=*/null);
+
+ final ArrowFieldNode fieldNode = new ArrowFieldNode(vector.getValueCount(), vector.getNullCount());
+ cloned.loadFieldBuffers(fieldNode, vector.getFieldBuffers());
+
+ return cloned;
+ }
+
+ /**
+ * Dictionary encodes subfields for complex vector with a provided dictionary.
+ * The dictionary must contain all values in the sub fields vector.
+ * @param vector vector to encode
+ * @param columnToDictionaryId the mappings between child vector index and dictionary id. A null dictionary
+ * id indicates the child vector is not encodable.
+ * @return dictionary encoded vector
+ */
+ public StructVector encode(StructVector vector, Map<Integer, Long> columnToDictionaryId) {
+ final int valueCount = vector.getValueCount();
+ final int childCount = vector.getChildrenFromFields().size();
+
+ List<Field> childrenFields = new ArrayList<>();
+
+ // initialize child fields.
+ for (int i = 0; i < childCount; i++) {
+ FieldVector childVector = getChildVector(vector, i);
+ Long dictionaryId = columnToDictionaryId.get(i);
+ // A null dictionaryId indicates the child vector shouldn't be encoded.
+ if (dictionaryId == null) {
+ childrenFields.add(childVector.getField());
+ } else {
+ Dictionary dictionary = provider.lookup(dictionaryId);
+ Preconditions.checkNotNull(dictionary, "Dictionary not found with id:" + dictionaryId);
+ FieldType indexFieldType = new FieldType(childVector.getField().isNullable(),
+ dictionary.getEncoding().getIndexType(), dictionary.getEncoding());
+ childrenFields.add(new Field(childVector.getField().getName(), indexFieldType, /*children=*/null));
+ }
+ }
+
+ // clone list vector and initialize data vector
+ StructVector encoded = cloneVector(vector);
+ encoded.initializeChildrenFromFields(childrenFields);
+ encoded.setValueCount(valueCount);
+
+ for (int index = 0; index < childCount; index++) {
+ FieldVector childVector = getChildVector(vector, index);
+ FieldVector encodedChildVector = getChildVector(encoded, index);
+ Long dictionaryId = columnToDictionaryId.get(index);
+ if (dictionaryId != null) {
+ BaseIntVector indices = (BaseIntVector) encodedChildVector;
+ DictionaryEncoder.buildIndexVector(childVector, indices, dictionaryIdToHashTable.get(dictionaryId),
+ 0, valueCount);
+ } else {
+ childVector.makeTransferPair(encodedChildVector).splitAndTransfer(0, valueCount);
+ }
+ }
+
+ return encoded;
+ }
+
+ /**
+ * Decodes a dictionary subfields encoded vector using the provided dictionary.
+ * @param vector dictionary encoded vector, its child vector must be int type
+ * @return vector with values restored from dictionary
+ */
+ public StructVector decode(StructVector vector) {
+
+ final int valueCount = vector.getValueCount();
+ final int childCount = vector.getChildrenFromFields().size();
+
+ // clone list vector and initialize child vectors
+ StructVector decoded = cloneVector(vector);
+ List<Field> childFields = new ArrayList<>();
+ for (int i = 0; i < childCount; i++) {
+ FieldVector childVector = getChildVector(vector, i);
+ Dictionary dictionary = getChildVectorDictionary(childVector);
+ // childVector is not encoded.
+ if (dictionary == null) {
+ childFields.add(childVector.getField());
+ } else {
+ childFields.add(dictionary.getVector().getField());
+ }
+ }
+ decoded.initializeChildrenFromFields(childFields);
+ decoded.setValueCount(valueCount);
+
+ for (int index = 0; index < childCount; index++) {
+ // get child vector
+ FieldVector childVector = getChildVector(vector, index);
+ FieldVector decodedChildVector = getChildVector(decoded, index);
+ Dictionary dictionary = getChildVectorDictionary(childVector);
+ if (dictionary == null) {
+ childVector.makeTransferPair(decodedChildVector).splitAndTransfer(0, valueCount);
+ } else {
+ TransferPair transfer = dictionary.getVector().makeTransferPair(decodedChildVector);
+ BaseIntVector indices = (BaseIntVector) childVector;
+
+ DictionaryEncoder.retrieveIndexVector(indices, transfer, valueCount, 0, valueCount);
+ }
+ }
+
+ return decoded;
+ }
+
+ /**
+ * Get the child vector dictionary, return null if not dictionary encoded.
+ */
+ private Dictionary getChildVectorDictionary(FieldVector childVector) {
+ DictionaryEncoding dictionaryEncoding = childVector.getField().getDictionary();
+ if (dictionaryEncoding != null) {
+ Dictionary dictionary = provider.lookup(dictionaryEncoding.getId());
+ Preconditions.checkNotNull(dictionary, "Dictionary not found with id:" + dictionary);
+ return dictionary;
+ }
+ return null;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/ComplexHolder.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/ComplexHolder.java
new file mode 100644
index 000000000..b4cb4882f
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/ComplexHolder.java
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.holders;
+
+import org.apache.arrow.vector.complex.reader.FieldReader;
+
+/**
+ * Represents a single value of a complex type (e.g. Union, Struct).
+ */
+public class ComplexHolder implements ValueHolder {
+ public FieldReader reader;
+ public int isSet;
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/DenseUnionHolder.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/DenseUnionHolder.java
new file mode 100644
index 000000000..c3052711e
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/DenseUnionHolder.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.holders;
+
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.types.Types;
+
+/**
+ * {@link ValueHolder} for Dense Union Vectors.
+ */
+public class DenseUnionHolder implements ValueHolder {
+ public FieldReader reader;
+ public int isSet;
+ public byte typeId;
+
+ public Types.MinorType getMinorType() {
+ return reader.getMinorType();
+ }
+
+ public boolean isSet() {
+ return isSet == 1;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/RepeatedListHolder.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/RepeatedListHolder.java
new file mode 100644
index 000000000..fc17704f0
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/RepeatedListHolder.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.holders;
+
+/**
+ * {@link ValueHolder} for a nested {@link org.apache.arrow.vector.complex.ListVector}.
+ */
+public final class RepeatedListHolder implements ValueHolder {
+ public int start;
+ public int end;
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/RepeatedStructHolder.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/RepeatedStructHolder.java
new file mode 100644
index 000000000..32c590a8a
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/RepeatedStructHolder.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.holders;
+
+/**
+ * {@link ValueHolder} for a list of structs.
+ */
+public final class RepeatedStructHolder implements ValueHolder {
+ public int start;
+ public int end;
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/UnionHolder.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/UnionHolder.java
new file mode 100644
index 000000000..e67a0e941
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/UnionHolder.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.holders;
+
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.types.Types.MinorType;
+
+/**
+ * {@link ValueHolder} for Union Vectors.
+ */
+public class UnionHolder implements ValueHolder {
+ public FieldReader reader;
+ public int isSet;
+
+ public MinorType getMinorType() {
+ return reader.getMinorType();
+ }
+
+ public boolean isSet() {
+ return isSet == 1;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/ValueHolder.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/ValueHolder.java
new file mode 100644
index 000000000..a809e6bb8
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/ValueHolder.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.holders;
+
+/**
+ * Wrapper object for an individual value in Arrow.
+ *
+ * <p>ValueHolders are designed to be mutable wrapper objects for defining clean
+ * APIs that access data in Arrow. For performance, object creation is avoided
+ * at all costs throughout execution. For this reason, ValueHolders are
+ * disallowed from implementing any methods, this allows for them to be
+ * replaced by their java primitive inner members during optimization of
+ * run-time generated code.
+ */
+public interface ValueHolder {
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowFileReader.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowFileReader.java
new file mode 100644
index 000000000..f4e9e0db1
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowFileReader.java
@@ -0,0 +1,230 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.channels.SeekableByteChannel;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.arrow.flatbuf.Footer;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.util.VisibleForTesting;
+import org.apache.arrow.vector.compression.CompressionCodec;
+import org.apache.arrow.vector.compression.NoCompressionCodec;
+import org.apache.arrow.vector.ipc.message.ArrowBlock;
+import org.apache.arrow.vector.ipc.message.ArrowDictionaryBatch;
+import org.apache.arrow.vector.ipc.message.ArrowFooter;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.ipc.message.MessageSerializer;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.validate.MetadataV4UnionChecker;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * An implementation of {@link ArrowReader} that reads the standard arrow binary
+ * file format.
+ */
+public class ArrowFileReader extends ArrowReader {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(ArrowFileReader.class);
+
+ private SeekableReadChannel in;
+ private ArrowFooter footer;
+ private int currentDictionaryBatch = 0;
+ private int currentRecordBatch = 0;
+
+ public ArrowFileReader(
+ SeekableReadChannel in, BufferAllocator allocator, CompressionCodec.Factory compressionFactory) {
+ super(allocator, compressionFactory);
+ this.in = in;
+ }
+
+ public ArrowFileReader(
+ SeekableByteChannel in, BufferAllocator allocator, CompressionCodec.Factory compressionFactory) {
+ this(new SeekableReadChannel(in), allocator, compressionFactory);
+ }
+
+ public ArrowFileReader(SeekableReadChannel in, BufferAllocator allocator) {
+ this(in, allocator, NoCompressionCodec.Factory.INSTANCE);
+ }
+
+ public ArrowFileReader(SeekableByteChannel in, BufferAllocator allocator) {
+ this(new SeekableReadChannel(in), allocator);
+ }
+
+ @Override
+ public long bytesRead() {
+ return in.bytesRead();
+ }
+
+ @Override
+ protected void closeReadSource() throws IOException {
+ in.close();
+ }
+
+ @Override
+ protected Schema readSchema() throws IOException {
+ if (footer == null) {
+ if (in.size() <= (ArrowMagic.MAGIC_LENGTH * 2 + 4)) {
+ throw new InvalidArrowFileException("file too small: " + in.size());
+ }
+ ByteBuffer buffer = ByteBuffer.allocate(4 + ArrowMagic.MAGIC_LENGTH);
+ long footerLengthOffset = in.size() - buffer.remaining();
+ in.setPosition(footerLengthOffset);
+ in.readFully(buffer);
+ buffer.flip();
+ byte[] array = buffer.array();
+ if (!ArrowMagic.validateMagic(Arrays.copyOfRange(array, 4, array.length))) {
+ throw new InvalidArrowFileException("missing Magic number " + Arrays.toString(buffer.array()));
+ }
+ int footerLength = MessageSerializer.bytesToInt(array);
+ if (footerLength <= 0 || footerLength + ArrowMagic.MAGIC_LENGTH * 2 + 4 > in.size()) {
+ throw new InvalidArrowFileException("invalid footer length: " + footerLength);
+ }
+ long footerOffset = footerLengthOffset - footerLength;
+ LOGGER.debug("Footer starts at {}, length: {}", footerOffset, footerLength);
+ ByteBuffer footerBuffer = ByteBuffer.allocate(footerLength);
+ in.setPosition(footerOffset);
+ in.readFully(footerBuffer);
+ footerBuffer.flip();
+ Footer footerFB = Footer.getRootAsFooter(footerBuffer);
+ this.footer = new ArrowFooter(footerFB);
+ }
+ MetadataV4UnionChecker.checkRead(footer.getSchema(), footer.getMetadataVersion());
+ return footer.getSchema();
+ }
+
+ @Override
+ public void initialize() throws IOException {
+ super.initialize();
+
+ // empty stream, has no dictionaries in IPC.
+ if (footer.getRecordBatches().size() == 0) {
+ return;
+ }
+ // Read and load all dictionaries from schema
+ for (int i = 0; i < dictionaries.size(); i++) {
+ ArrowDictionaryBatch dictionaryBatch = readDictionary();
+ loadDictionary(dictionaryBatch);
+ }
+ }
+
+ /**
+ * Get custom metadata.
+ */
+ public Map<String, String> getMetaData() {
+ if (footer != null) {
+ return footer.getMetaData();
+ }
+ return new HashMap<>();
+ }
+
+ /**
+ * Read a dictionary batch from the source, will be invoked after the schema has been read and
+ * called N times, where N is the number of dictionaries indicated by the schema Fields.
+ *
+ * @return the read ArrowDictionaryBatch
+ * @throws IOException on error
+ */
+ public ArrowDictionaryBatch readDictionary() throws IOException {
+ if (currentDictionaryBatch >= footer.getDictionaries().size()) {
+ throw new IOException("Requested more dictionaries than defined in footer: " + currentDictionaryBatch);
+ }
+ ArrowBlock block = footer.getDictionaries().get(currentDictionaryBatch++);
+ return readDictionaryBatch(in, block, allocator);
+ }
+
+ /** Returns true if a batch was read, false if no more batches. */
+ @Override
+ public boolean loadNextBatch() throws IOException {
+ prepareLoadNextBatch();
+
+ if (currentRecordBatch < footer.getRecordBatches().size()) {
+ ArrowBlock block = footer.getRecordBatches().get(currentRecordBatch++);
+ ArrowRecordBatch batch = readRecordBatch(in, block, allocator);
+ loadRecordBatch(batch);
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+
+ public List<ArrowBlock> getDictionaryBlocks() throws IOException {
+ ensureInitialized();
+ return footer.getDictionaries();
+ }
+
+ /**
+ * Returns the {@link ArrowBlock} metadata from the file.
+ */
+ public List<ArrowBlock> getRecordBlocks() throws IOException {
+ ensureInitialized();
+ return footer.getRecordBatches();
+ }
+
+ /**
+ * Loads record batch for the given block.
+ */
+ public boolean loadRecordBatch(ArrowBlock block) throws IOException {
+ ensureInitialized();
+ int blockIndex = footer.getRecordBatches().indexOf(block);
+ if (blockIndex == -1) {
+ throw new IllegalArgumentException("Arrow block does not exist in record batches: " + block);
+ }
+ currentRecordBatch = blockIndex;
+ return loadNextBatch();
+ }
+
+ @VisibleForTesting
+ ArrowFooter getFooter() {
+ return footer;
+ }
+
+ private ArrowDictionaryBatch readDictionaryBatch(SeekableReadChannel in,
+ ArrowBlock block,
+ BufferAllocator allocator) throws IOException {
+ LOGGER.debug("DictionaryRecordBatch at {}, metadata: {}, body: {}",
+ block.getOffset(), block.getMetadataLength(), block.getBodyLength());
+ in.setPosition(block.getOffset());
+ ArrowDictionaryBatch batch = MessageSerializer.deserializeDictionaryBatch(in, block, allocator);
+ if (batch == null) {
+ throw new IOException("Invalid file. No batch at offset: " + block.getOffset());
+ }
+ return batch;
+ }
+
+ private ArrowRecordBatch readRecordBatch(SeekableReadChannel in,
+ ArrowBlock block,
+ BufferAllocator allocator) throws IOException {
+ LOGGER.debug("RecordBatch at {}, metadata: {}, body: {}",
+ block.getOffset(), block.getMetadataLength(),
+ block.getBodyLength());
+ in.setPosition(block.getOffset());
+ ArrowRecordBatch batch = MessageSerializer.deserializeRecordBatch(in, block, allocator);
+ if (batch == null) {
+ throw new IOException("Invalid file. No batch at offset: " + block.getOffset());
+ }
+ return batch;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowFileWriter.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowFileWriter.java
new file mode 100644
index 000000000..55cd26285
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowFileWriter.java
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import java.io.IOException;
+import java.nio.channels.WritableByteChannel;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.arrow.util.VisibleForTesting;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.ipc.message.ArrowBlock;
+import org.apache.arrow.vector.ipc.message.ArrowDictionaryBatch;
+import org.apache.arrow.vector.ipc.message.ArrowFooter;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.ipc.message.IpcOption;
+import org.apache.arrow.vector.ipc.message.MessageSerializer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * {@link ArrowWriter} that writes out a Arrow files (https://arrow.apache.org/docs/format/IPC.html#file-format).
+ */
+public class ArrowFileWriter extends ArrowWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(ArrowFileWriter.class);
+
+ // All ArrowBlocks written are saved in these lists to be passed to ArrowFooter in endInternal.
+ private final List<ArrowBlock> dictionaryBlocks = new ArrayList<>();
+ private final List<ArrowBlock> recordBlocks = new ArrayList<>();
+
+ private Map<String, String> metaData;
+
+ public ArrowFileWriter(VectorSchemaRoot root, DictionaryProvider provider, WritableByteChannel out) {
+ super(root, provider, out);
+ }
+
+ public ArrowFileWriter(VectorSchemaRoot root, DictionaryProvider provider, WritableByteChannel out,
+ Map<String, String> metaData) {
+ super(root, provider, out);
+ this.metaData = metaData;
+ }
+
+ public ArrowFileWriter(VectorSchemaRoot root, DictionaryProvider provider, WritableByteChannel out,
+ IpcOption option) {
+ super(root, provider, out, option);
+ }
+
+ public ArrowFileWriter(VectorSchemaRoot root, DictionaryProvider provider, WritableByteChannel out,
+ Map<String, String> metaData, IpcOption option) {
+ super(root, provider, out, option);
+ this.metaData = metaData;
+ }
+
+ @Override
+ protected void startInternal(WriteChannel out) throws IOException {
+ ArrowMagic.writeMagic(out, true);
+ }
+
+ @Override
+ protected ArrowBlock writeDictionaryBatch(ArrowDictionaryBatch batch) throws IOException {
+ ArrowBlock block = super.writeDictionaryBatch(batch);
+ dictionaryBlocks.add(block);
+ return block;
+ }
+
+ @Override
+ protected ArrowBlock writeRecordBatch(ArrowRecordBatch batch) throws IOException {
+ ArrowBlock block = super.writeRecordBatch(batch);
+ recordBlocks.add(block);
+ return block;
+ }
+
+ @Override
+ protected void endInternal(WriteChannel out) throws IOException {
+ if (!option.write_legacy_ipc_format) {
+ out.writeIntLittleEndian(MessageSerializer.IPC_CONTINUATION_TOKEN);
+ }
+ out.writeIntLittleEndian(0);
+
+ long footerStart = out.getCurrentPosition();
+ out.write(new ArrowFooter(schema, dictionaryBlocks, recordBlocks, metaData, option.metadataVersion), false);
+ int footerLength = (int) (out.getCurrentPosition() - footerStart);
+ if (footerLength <= 0) {
+ throw new InvalidArrowFileException("invalid footer");
+ }
+ out.writeIntLittleEndian(footerLength);
+ LOGGER.debug("Footer starts at {}, length: {}", footerStart, footerLength);
+ ArrowMagic.writeMagic(out, false);
+ LOGGER.debug("magic written, now at {}", out.getCurrentPosition());
+ }
+
+ @VisibleForTesting
+ public List<ArrowBlock> getRecordBlocks() {
+ return recordBlocks;
+ }
+
+ @VisibleForTesting
+ public List<ArrowBlock> getDictionaryBlocks() {
+ return dictionaryBlocks;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowMagic.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowMagic.java
new file mode 100644
index 000000000..9c399669a
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowMagic.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+
+/**
+ * Magic header/footer helpers for {@link ArrowFileWriter} and {@link ArrowFileReader} formatted files.
+ */
+class ArrowMagic {
+ private ArrowMagic(){}
+
+ private static final byte[] MAGIC = "ARROW1".getBytes(StandardCharsets.UTF_8);
+
+ public static final int MAGIC_LENGTH = MAGIC.length;
+
+ public static void writeMagic(WriteChannel out, boolean align) throws IOException {
+ out.write(MAGIC);
+ if (align) {
+ out.align();
+ }
+ }
+
+ public static boolean validateMagic(byte[] array) {
+ return Arrays.equals(MAGIC, array);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowReader.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowReader.java
new file mode 100644
index 000000000..9d940deec
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowReader.java
@@ -0,0 +1,255 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.VectorLoader;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.compression.CompressionCodec;
+import org.apache.arrow.vector.compression.NoCompressionCodec;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.ipc.message.ArrowDictionaryBatch;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.util.DictionaryUtility;
+import org.apache.arrow.vector.util.VectorBatchAppender;
+
+/**
+ * Abstract class to read Schema and ArrowRecordBatches.
+ *
+ */
+public abstract class ArrowReader implements DictionaryProvider, AutoCloseable {
+
+ protected final BufferAllocator allocator;
+ private VectorLoader loader;
+ private VectorSchemaRoot root;
+ protected Map<Long, Dictionary> dictionaries;
+ private boolean initialized = false;
+
+ private final CompressionCodec.Factory compressionFactory;
+
+ protected ArrowReader(BufferAllocator allocator) {
+ this(allocator, NoCompressionCodec.Factory.INSTANCE);
+ }
+
+ protected ArrowReader(BufferAllocator allocator, CompressionCodec.Factory compressionFactory) {
+ this.allocator = allocator;
+ this.compressionFactory = compressionFactory;
+ }
+
+ /**
+ * Returns the vector schema root. This will be loaded with new values on every call to loadNextBatch.
+ *
+ * @return the vector schema root
+ * @throws IOException if reading of schema fails
+ */
+ public VectorSchemaRoot getVectorSchemaRoot() throws IOException {
+ ensureInitialized();
+ return root;
+ }
+
+ /**
+ * Returns any dictionaries that were loaded along with ArrowRecordBatches.
+ *
+ * @return Map of dictionaries to dictionary id, empty if no dictionaries loaded
+ * @throws IOException if reading of schema fails
+ */
+ public Map<Long, Dictionary> getDictionaryVectors() throws IOException {
+ ensureInitialized();
+ return dictionaries;
+ }
+
+ /**
+ * Lookup a dictionary that has been loaded using the dictionary id.
+ *
+ * @param id Unique identifier for a dictionary
+ * @return the requested dictionary or null if not found
+ */
+ @Override
+ public Dictionary lookup(long id) {
+ if (!initialized) {
+ throw new IllegalStateException("Unable to lookup until reader has been initialized");
+ }
+
+ return dictionaries.get(id);
+ }
+
+ /**
+ * Load the next ArrowRecordBatch to the vector schema root if available.
+ *
+ * @return true if a batch was read, false on EOS
+ * @throws IOException on error
+ */
+ public abstract boolean loadNextBatch() throws IOException;
+
+ /**
+ * Return the number of bytes read from the ReadChannel.
+ *
+ * @return number of bytes read
+ */
+ public abstract long bytesRead();
+
+ /**
+ * Close resources, including vector schema root and dictionary vectors, and the
+ * underlying read source.
+ *
+ * @throws IOException on error
+ */
+ @Override
+ public void close() throws IOException {
+ close(true);
+ }
+
+ /**
+ * Close resources, including vector schema root and dictionary vectors. If the flag
+ * closeReadChannel is true then close the underlying read source, otherwise leave it open.
+ *
+ * @param closeReadSource Flag to control if closing the underlying read source
+ * @throws IOException on error
+ */
+ public void close(boolean closeReadSource) throws IOException {
+ if (initialized) {
+ root.close();
+ for (Dictionary dictionary : dictionaries.values()) {
+ dictionary.getVector().close();
+ }
+ }
+
+ if (closeReadSource) {
+ closeReadSource();
+ }
+ }
+
+ /**
+ * Close the underlying read source.
+ *
+ * @throws IOException on error
+ */
+ protected abstract void closeReadSource() throws IOException;
+
+ /**
+ * Read the Schema from the source, will be invoked at the beginning the initialization.
+ *
+ * @return the read Schema
+ * @throws IOException on error
+ */
+ protected abstract Schema readSchema() throws IOException;
+
+ /**
+ * Initialize if not done previously.
+ *
+ * @throws IOException on error
+ */
+ protected void ensureInitialized() throws IOException {
+ if (!initialized) {
+ initialize();
+ initialized = true;
+ }
+ }
+
+ /**
+ * Reads the schema and initializes the vectors.
+ */
+ protected void initialize() throws IOException {
+ Schema originalSchema = readSchema();
+ List<Field> fields = new ArrayList<>(originalSchema.getFields().size());
+ List<FieldVector> vectors = new ArrayList<>(originalSchema.getFields().size());
+ Map<Long, Dictionary> dictionaries = new HashMap<>();
+
+ // Convert fields with dictionaries to have the index type
+ for (Field field : originalSchema.getFields()) {
+ Field updated = DictionaryUtility.toMemoryFormat(field, allocator, dictionaries);
+ fields.add(updated);
+ vectors.add(updated.createVector(allocator));
+ }
+ Schema schema = new Schema(fields, originalSchema.getCustomMetadata());
+
+ this.root = new VectorSchemaRoot(schema, vectors, 0);
+ this.loader = new VectorLoader(root, compressionFactory);
+ this.dictionaries = Collections.unmodifiableMap(dictionaries);
+ }
+
+ /**
+ * Ensure the reader has been initialized and reset the VectorSchemaRoot row count to 0.
+ *
+ * @throws IOException on error
+ */
+ protected void prepareLoadNextBatch() throws IOException {
+ ensureInitialized();
+ root.setRowCount(0);
+ }
+
+ /**
+ * Load an ArrowRecordBatch to the readers VectorSchemaRoot.
+ *
+ * @param batch the record batch to load
+ */
+ protected void loadRecordBatch(ArrowRecordBatch batch) {
+ try {
+ loader.load(batch);
+ } finally {
+ batch.close();
+ }
+ }
+
+ /**
+ * Load an ArrowDictionaryBatch to the readers dictionary vectors.
+ *
+ * @param dictionaryBatch dictionary batch to load
+ */
+ protected void loadDictionary(ArrowDictionaryBatch dictionaryBatch) {
+ long id = dictionaryBatch.getDictionaryId();
+ Dictionary dictionary = dictionaries.get(id);
+ if (dictionary == null) {
+ throw new IllegalArgumentException("Dictionary ID " + id + " not defined in schema");
+ }
+ FieldVector vector = dictionary.getVector();
+ // if is deltaVector, concat it with non-delta vector with the same ID.
+ if (dictionaryBatch.isDelta()) {
+ try (FieldVector deltaVector = vector.getField().createVector(allocator)) {
+ load(dictionaryBatch, deltaVector);
+ VectorBatchAppender.batchAppend(vector, deltaVector);
+ }
+ return;
+ }
+
+ load(dictionaryBatch, vector);
+ }
+
+ private void load(ArrowDictionaryBatch dictionaryBatch, FieldVector vector) {
+ VectorSchemaRoot root = new VectorSchemaRoot(
+ Collections.singletonList(vector.getField()),
+ Collections.singletonList(vector), 0);
+ VectorLoader loader = new VectorLoader(root);
+ try {
+ loader.load(dictionaryBatch.getDictionary());
+ } finally {
+ dictionaryBatch.close();
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowStreamReader.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowStreamReader.java
new file mode 100644
index 000000000..a0096aaf3
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowStreamReader.java
@@ -0,0 +1,229 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.channels.Channels;
+import java.nio.channels.ReadableByteChannel;
+
+import org.apache.arrow.flatbuf.MessageHeader;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.compression.CompressionCodec;
+import org.apache.arrow.vector.compression.NoCompressionCodec;
+import org.apache.arrow.vector.ipc.message.ArrowDictionaryBatch;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.ipc.message.MessageChannelReader;
+import org.apache.arrow.vector.ipc.message.MessageResult;
+import org.apache.arrow.vector.ipc.message.MessageSerializer;
+import org.apache.arrow.vector.types.MetadataVersion;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.validate.MetadataV4UnionChecker;
+
+/**
+ * This class reads from an input stream and produces ArrowRecordBatches.
+ */
+public class ArrowStreamReader extends ArrowReader {
+
+ private MessageChannelReader messageReader;
+
+ private int loadedDictionaryCount;
+
+ /**
+ * Constructs a streaming reader using a MessageChannelReader. Non-blocking.
+ *
+ * @param messageReader reader used to get messages from a ReadChannel
+ * @param allocator to allocate new buffers
+ * @param compressionFactory the factory to create compression codec.
+ */
+ public ArrowStreamReader(
+ MessageChannelReader messageReader, BufferAllocator allocator, CompressionCodec.Factory compressionFactory) {
+ super(allocator, compressionFactory);
+ this.messageReader = messageReader;
+ }
+
+ /**
+ * Constructs a streaming reader using a MessageChannelReader. Non-blocking.
+ *
+ * @param messageReader reader used to get messages from a ReadChannel
+ * @param allocator to allocate new buffers
+ */
+ public ArrowStreamReader(MessageChannelReader messageReader, BufferAllocator allocator) {
+ this(messageReader, allocator, NoCompressionCodec.Factory.INSTANCE);
+ }
+
+ /**
+ * Constructs a streaming reader from a ReadableByteChannel input. Non-blocking.
+ *
+ * @param in ReadableByteChannel to read messages from
+ * @param allocator to allocate new buffers
+ * @param compressionFactory the factory to create compression codec.
+ */
+ public ArrowStreamReader(
+ ReadableByteChannel in, BufferAllocator allocator, CompressionCodec.Factory compressionFactory) {
+ this(new MessageChannelReader(new ReadChannel(in), allocator), allocator, compressionFactory);
+ }
+
+ /**
+ * Constructs a streaming reader from a ReadableByteChannel input. Non-blocking.
+ *
+ * @param in ReadableByteChannel to read messages from
+ * @param allocator to allocate new buffers
+ */
+ public ArrowStreamReader(ReadableByteChannel in, BufferAllocator allocator) {
+ this(new MessageChannelReader(new ReadChannel(in), allocator), allocator);
+ }
+
+ /**
+ * Constructs a streaming reader from a ReadableByteChannel input. Non-blocking.
+ *
+ * @param in InputStream to read messages from
+ * @param allocator to allocate new buffers
+ * @param compressionFactory the factory to create compression codec.
+ */
+ public ArrowStreamReader(
+ InputStream in, BufferAllocator allocator, CompressionCodec.Factory compressionFactory) {
+ this(Channels.newChannel(in), allocator, compressionFactory);
+ }
+
+ /**
+ * Constructs a streaming reader from an InputStream. Non-blocking.
+ *
+ * @param in InputStream to read messages from
+ * @param allocator to allocate new buffers
+ */
+ public ArrowStreamReader(InputStream in, BufferAllocator allocator) {
+ this(Channels.newChannel(in), allocator);
+ }
+
+ /**
+ * Get the number of bytes read from the stream since constructing the reader.
+ *
+ * @return number of bytes
+ */
+ @Override
+ public long bytesRead() {
+ return messageReader.bytesRead();
+ }
+
+ /**
+ * Closes the underlying read source.
+ *
+ * @throws IOException on error
+ */
+ @Override
+ protected void closeReadSource() throws IOException {
+ messageReader.close();
+ }
+
+ /**
+ * Load the next ArrowRecordBatch to the vector schema root if available.
+ *
+ * @return true if a batch was read, false on EOS
+ * @throws IOException on error
+ */
+ public boolean loadNextBatch() throws IOException {
+ prepareLoadNextBatch();
+ MessageResult result = messageReader.readNext();
+
+ // Reached EOS
+ if (result == null) {
+ return false;
+ }
+
+ if (result.getMessage().headerType() == MessageHeader.RecordBatch) {
+ ArrowBuf bodyBuffer = result.getBodyBuffer();
+
+ // For zero-length batches, need an empty buffer to deserialize the batch
+ if (bodyBuffer == null) {
+ bodyBuffer = allocator.getEmpty();
+ }
+
+ ArrowRecordBatch batch = MessageSerializer.deserializeRecordBatch(result.getMessage(), bodyBuffer);
+ loadRecordBatch(batch);
+ checkDictionaries();
+ return true;
+ } else if (result.getMessage().headerType() == MessageHeader.DictionaryBatch) {
+ // if it's dictionary message, read dictionary message out and continue to read unless get a batch or eos.
+ ArrowDictionaryBatch dictionaryBatch = readDictionary(result);
+ loadDictionary(dictionaryBatch);
+ loadedDictionaryCount++;
+ return loadNextBatch();
+ } else {
+ throw new IOException("Expected RecordBatch or DictionaryBatch but header was " +
+ result.getMessage().headerType());
+ }
+ }
+
+ /**
+ * When read a record batch, check whether its dictionaries are available.
+ */
+ private void checkDictionaries() throws IOException {
+ // if all dictionaries are loaded, return.
+ if (loadedDictionaryCount == dictionaries.size()) {
+ return;
+ }
+ for (FieldVector vector : getVectorSchemaRoot().getFieldVectors()) {
+ DictionaryEncoding encoding = vector.getField().getDictionary();
+ if (encoding != null) {
+ // if the dictionaries it needs is not available and the vector is not all null, something was wrong.
+ if (!dictionaries.containsKey(encoding.getId()) && vector.getNullCount() < vector.getValueCount()) {
+ throw new IOException("The dictionary was not available, id was:" + encoding.getId());
+ }
+ }
+ }
+ }
+
+ /**
+ * Reads the schema message from the beginning of the stream.
+ *
+ * @return the deserialized arrow schema
+ */
+ @Override
+ protected Schema readSchema() throws IOException {
+ MessageResult result = messageReader.readNext();
+
+ if (result == null) {
+ throw new IOException("Unexpected end of input. Missing schema.");
+ }
+
+ if (result.getMessage().headerType() != MessageHeader.Schema) {
+ throw new IOException("Expected schema but header was " + result.getMessage().headerType());
+ }
+
+ final Schema schema = MessageSerializer.deserializeSchema(result.getMessage());
+ MetadataV4UnionChecker.checkRead(schema, MetadataVersion.fromFlatbufID(result.getMessage().version()));
+ return schema;
+ }
+
+
+ private ArrowDictionaryBatch readDictionary(MessageResult result) throws IOException {
+
+ ArrowBuf bodyBuffer = result.getBodyBuffer();
+
+ // For zero-length batches, need an empty buffer to deserialize the batch
+ if (bodyBuffer == null) {
+ bodyBuffer = allocator.getEmpty();
+ }
+
+ return MessageSerializer.deserializeDictionaryBatch(result.getMessage(), bodyBuffer);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowStreamWriter.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowStreamWriter.java
new file mode 100644
index 000000000..deb98580f
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowStreamWriter.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.channels.Channels;
+import java.nio.channels.WritableByteChannel;
+
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.ipc.message.IpcOption;
+import org.apache.arrow.vector.ipc.message.MessageSerializer;
+
+/**
+ * Writer for the Arrow stream format to send ArrowRecordBatches over a WriteChannel.
+ */
+public class ArrowStreamWriter extends ArrowWriter {
+
+ /**
+ * Construct an ArrowStreamWriter with an optional DictionaryProvider for the OutputStream.
+ *
+ * @param root Existing VectorSchemaRoot with vectors to be written.
+ * @param provider DictionaryProvider for any vectors that are dictionary encoded.
+ * (Optional, can be null)
+ * @param out OutputStream for writing.
+ */
+ public ArrowStreamWriter(VectorSchemaRoot root, DictionaryProvider provider, OutputStream out) {
+ this(root, provider, Channels.newChannel(out));
+ }
+
+ /**
+ * Construct an ArrowStreamWriter with an optional DictionaryProvider for the WritableByteChannel.
+ */
+ public ArrowStreamWriter(VectorSchemaRoot root, DictionaryProvider provider, WritableByteChannel out) {
+ this(root, provider, out, IpcOption.DEFAULT);
+ }
+
+ /**
+ * Construct an ArrowStreamWriter with an optional DictionaryProvider for the WritableByteChannel.
+ *
+ * @param root Existing VectorSchemaRoot with vectors to be written.
+ * @param provider DictionaryProvider for any vectors that are dictionary encoded.
+ * (Optional, can be null)
+ * @param option IPC write options
+ * @param out WritableByteChannel for writing.
+ */
+ public ArrowStreamWriter(VectorSchemaRoot root, DictionaryProvider provider, WritableByteChannel out,
+ IpcOption option) {
+ super(root, provider, out, option);
+ }
+
+ /**
+ * Write an EOS identifier to the WriteChannel.
+ *
+ * @param out Open WriteChannel with an active Arrow stream.
+ * @param option IPC write option
+ * @throws IOException on error
+ */
+ public static void writeEndOfStream(WriteChannel out, IpcOption option) throws IOException {
+ if (!option.write_legacy_ipc_format) {
+ out.writeIntLittleEndian(MessageSerializer.IPC_CONTINUATION_TOKEN);
+ }
+ out.writeIntLittleEndian(0);
+ }
+
+ @Override
+ protected void endInternal(WriteChannel out) throws IOException {
+ writeEndOfStream(out, option);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowWriter.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowWriter.java
new file mode 100644
index 000000000..7bc9a306f
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowWriter.java
@@ -0,0 +1,210 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import java.io.IOException;
+import java.nio.channels.WritableByteChannel;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.arrow.util.AutoCloseables;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.VectorUnloader;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.ipc.message.ArrowBlock;
+import org.apache.arrow.vector.ipc.message.ArrowDictionaryBatch;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.ipc.message.IpcOption;
+import org.apache.arrow.vector.ipc.message.MessageSerializer;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.util.DictionaryUtility;
+import org.apache.arrow.vector.validate.MetadataV4UnionChecker;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Abstract base class for implementing Arrow writers for IPC over a WriteChannel.
+ */
+public abstract class ArrowWriter implements AutoCloseable {
+
+ protected static final Logger LOGGER = LoggerFactory.getLogger(ArrowWriter.class);
+
+ // schema with fields in message format, not memory format
+ protected final Schema schema;
+ protected final WriteChannel out;
+
+ private final VectorUnloader unloader;
+ private final List<ArrowDictionaryBatch> dictionaries;
+
+ private boolean started = false;
+ private boolean ended = false;
+
+ private boolean dictWritten = false;
+
+ protected IpcOption option;
+
+ protected ArrowWriter(VectorSchemaRoot root, DictionaryProvider provider, WritableByteChannel out) {
+ this (root, provider, out, IpcOption.DEFAULT);
+ }
+
+ /**
+ * Note: fields are not closed when the writer is closed.
+ *
+ * @param root the vectors to write to the output
+ * @param provider where to find the dictionaries
+ * @param out the output where to write
+ * @param option IPC write options
+ */
+ protected ArrowWriter(VectorSchemaRoot root, DictionaryProvider provider, WritableByteChannel out, IpcOption option) {
+ this.unloader = new VectorUnloader(root);
+ this.out = new WriteChannel(out);
+ this.option = option;
+
+ List<Field> fields = new ArrayList<>(root.getSchema().getFields().size());
+ Set<Long> dictionaryIdsUsed = new HashSet<>();
+
+ MetadataV4UnionChecker.checkForUnion(root.getSchema().getFields().iterator(), option.metadataVersion);
+ // Convert fields with dictionaries to have dictionary type
+ for (Field field : root.getSchema().getFields()) {
+ fields.add(DictionaryUtility.toMessageFormat(field, provider, dictionaryIdsUsed));
+ }
+
+ // Create a record batch for each dictionary
+ this.dictionaries = new ArrayList<>(dictionaryIdsUsed.size());
+ for (long id : dictionaryIdsUsed) {
+ Dictionary dictionary = provider.lookup(id);
+ FieldVector vector = dictionary.getVector();
+ int count = vector.getValueCount();
+ VectorSchemaRoot dictRoot = new VectorSchemaRoot(
+ Collections.singletonList(vector.getField()),
+ Collections.singletonList(vector),
+ count);
+ VectorUnloader unloader = new VectorUnloader(dictRoot);
+ ArrowRecordBatch batch = unloader.getRecordBatch();
+ this.dictionaries.add(new ArrowDictionaryBatch(id, batch));
+ }
+
+ this.schema = new Schema(fields, root.getSchema().getCustomMetadata());
+ }
+
+ public void start() throws IOException {
+ ensureStarted();
+ }
+
+ /**
+ * Writes the record batch currently loaded in this instance's VectorSchemaRoot.
+ */
+ public void writeBatch() throws IOException {
+ ensureStarted();
+ ensureDictionariesWritten();
+ try (ArrowRecordBatch batch = unloader.getRecordBatch()) {
+ writeRecordBatch(batch);
+ }
+ }
+
+ protected ArrowBlock writeDictionaryBatch(ArrowDictionaryBatch batch) throws IOException {
+ ArrowBlock block = MessageSerializer.serialize(out, batch, option);
+ if (LOGGER.isDebugEnabled()) {
+ LOGGER.debug("DictionaryRecordBatch at {}, metadata: {}, body: {}",
+ block.getOffset(), block.getMetadataLength(), block.getBodyLength());
+ }
+ return block;
+ }
+
+ protected ArrowBlock writeRecordBatch(ArrowRecordBatch batch) throws IOException {
+ ArrowBlock block = MessageSerializer.serialize(out, batch, option);
+ if (LOGGER.isDebugEnabled()) {
+ LOGGER.debug("RecordBatch at {}, metadata: {}, body: {}",
+ block.getOffset(), block.getMetadataLength(), block.getBodyLength());
+ }
+ return block;
+ }
+
+ public void end() throws IOException {
+ ensureStarted();
+ ensureEnded();
+ }
+
+ public long bytesWritten() {
+ return out.getCurrentPosition();
+ }
+
+ private void ensureStarted() throws IOException {
+ if (!started) {
+ started = true;
+ startInternal(out);
+ // write the schema - for file formats this is duplicated in the footer, but matches
+ // the streaming format
+ MessageSerializer.serialize(out, schema, option);
+ }
+ }
+
+ /**
+ * Write dictionaries after schema and before recordBatches, dictionaries won't be
+ * written if empty stream (only has schema data in IPC).
+ */
+ private void ensureDictionariesWritten() throws IOException {
+ if (!dictWritten) {
+ dictWritten = true;
+ // write out any dictionaries
+ try {
+ for (ArrowDictionaryBatch batch : dictionaries) {
+ writeDictionaryBatch(batch);
+ }
+ } finally {
+ try {
+ AutoCloseables.close(dictionaries);
+ } catch (Exception e) {
+ throw new RuntimeException("Error occurred while closing dictionaries.", e);
+ }
+ }
+ }
+ }
+
+ private void ensureEnded() throws IOException {
+ if (!ended) {
+ ended = true;
+ endInternal(out);
+ }
+ }
+
+ protected void startInternal(WriteChannel out) throws IOException {
+ }
+
+ protected void endInternal(WriteChannel out) throws IOException {
+ }
+
+ @Override
+ public void close() {
+ try {
+ end();
+ out.close();
+ if (!dictWritten) {
+ AutoCloseables.close(dictionaries);
+ }
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/InvalidArrowFileException.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/InvalidArrowFileException.java
new file mode 100644
index 000000000..e234058e6
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/InvalidArrowFileException.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+/**
+ * Exception indicating a problem with an Arrow File (https://arrow.apache.org/docs/format/IPC.html#file-format).
+ */
+public class InvalidArrowFileException extends RuntimeException {
+ private static final long serialVersionUID = 1L;
+
+ public InvalidArrowFileException(String message) {
+ super(message);
+ }
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java
new file mode 100644
index 000000000..d093e840a
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java
@@ -0,0 +1,806 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import static com.fasterxml.jackson.core.JsonToken.END_ARRAY;
+import static com.fasterxml.jackson.core.JsonToken.END_OBJECT;
+import static com.fasterxml.jackson.core.JsonToken.START_ARRAY;
+import static com.fasterxml.jackson.core.JsonToken.START_OBJECT;
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.apache.arrow.vector.BufferLayout.BufferType.DATA;
+import static org.apache.arrow.vector.BufferLayout.BufferType.OFFSET;
+import static org.apache.arrow.vector.BufferLayout.BufferType.TYPE;
+import static org.apache.arrow.vector.BufferLayout.BufferType.VALIDITY;
+
+import java.io.File;
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.BitVectorHelper;
+import org.apache.arrow.vector.BufferLayout.BufferType;
+import org.apache.arrow.vector.Decimal256Vector;
+import org.apache.arrow.vector.DecimalVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.IntervalDayVector;
+import org.apache.arrow.vector.IntervalMonthDayNanoVector;
+import org.apache.arrow.vector.SmallIntVector;
+import org.apache.arrow.vector.TinyIntVector;
+import org.apache.arrow.vector.TypeLayout;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.util.DecimalUtility;
+import org.apache.arrow.vector.util.DictionaryUtility;
+import org.apache.commons.codec.DecoderException;
+import org.apache.commons.codec.binary.Hex;
+
+import com.fasterxml.jackson.core.JsonParseException;
+import com.fasterxml.jackson.core.JsonParser;
+import com.fasterxml.jackson.core.JsonToken;
+import com.fasterxml.jackson.databind.MapperFeature;
+import com.fasterxml.jackson.databind.MappingJsonFactory;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+/**
+ * A reader for JSON files that translates them into vectors. This reader is used for integration tests.
+ *
+ * <p>This class uses a streaming parser API, method naming tends to reflect this implementation
+ * detail.
+ */
+public class JsonFileReader implements AutoCloseable, DictionaryProvider {
+ private final JsonParser parser;
+ private final BufferAllocator allocator;
+ private Schema schema;
+ private Map<Long, Dictionary> dictionaries;
+ private Boolean started = false;
+
+ /**
+ * Constructs a new instance.
+ * @param inputFile The file to read.
+ * @param allocator The allocator to use for allocating buffers.
+ */
+ public JsonFileReader(File inputFile, BufferAllocator allocator) throws JsonParseException, IOException {
+ super();
+ this.allocator = allocator;
+ MappingJsonFactory jsonFactory = new MappingJsonFactory(new ObjectMapper()
+ //ignore case for enums
+ .configure(MapperFeature.ACCEPT_CASE_INSENSITIVE_ENUMS, true)
+ );
+ this.parser = jsonFactory.createParser(inputFile);
+ // Allow reading NaN for floating point values
+ this.parser.configure(JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS, true);
+ }
+
+ @Override
+ public Dictionary lookup(long id) {
+ if (!started) {
+ throw new IllegalStateException("Unable to lookup until after read() has started");
+ }
+
+ return dictionaries.get(id);
+ }
+
+ /** Reads the beginning (schema section) of the json file and returns it. */
+ public Schema start() throws JsonParseException, IOException {
+ readToken(START_OBJECT);
+ {
+ Schema originalSchema = readNextField("schema", Schema.class);
+ List<Field> fields = new ArrayList<>();
+ dictionaries = new HashMap<>();
+
+ // Convert fields with dictionaries to have the index type
+ for (Field field : originalSchema.getFields()) {
+ fields.add(DictionaryUtility.toMemoryFormat(field, allocator, dictionaries));
+ }
+ this.schema = new Schema(fields, originalSchema.getCustomMetadata());
+
+ if (!dictionaries.isEmpty()) {
+ nextFieldIs("dictionaries");
+ readDictionaryBatches();
+ }
+
+ nextFieldIs("batches");
+ readToken(START_ARRAY);
+ started = true;
+ return this.schema;
+ }
+ }
+
+ private void readDictionaryBatches() throws JsonParseException, IOException {
+ readToken(START_ARRAY);
+ JsonToken token = parser.nextToken();
+ boolean haveDictionaryBatch = token == START_OBJECT;
+ while (haveDictionaryBatch) {
+
+ // Lookup what dictionary for the batch about to be read
+ long id = readNextField("id", Long.class);
+ Dictionary dict = dictionaries.get(id);
+ if (dict == null) {
+ throw new IllegalArgumentException("Dictionary with id: " + id + " missing encoding from schema Field");
+ }
+
+ // Read the dictionary record batch
+ nextFieldIs("data");
+ FieldVector vector = dict.getVector();
+ List<Field> fields = Collections.singletonList(vector.getField());
+ List<FieldVector> vectors = Collections.singletonList(vector);
+ VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors, vector.getValueCount());
+ read(root);
+
+ readToken(END_OBJECT);
+ token = parser.nextToken();
+ haveDictionaryBatch = token == START_OBJECT;
+ }
+
+ if (token != END_ARRAY) {
+ throw new IllegalArgumentException("Invalid token: " + token + " expected end of array at " +
+ parser.getTokenLocation());
+ }
+ }
+
+ /**
+ * Reads the next record batch from the file into <code>root</code>.
+ */
+ public boolean read(VectorSchemaRoot root) throws IOException {
+ JsonToken t = parser.nextToken();
+ if (t == START_OBJECT) {
+ {
+ int count = readNextField("count", Integer.class);
+ nextFieldIs("columns");
+ readToken(START_ARRAY);
+ {
+ for (Field field : root.getSchema().getFields()) {
+ FieldVector vector = root.getVector(field);
+ readFromJsonIntoVector(field, vector);
+ }
+ }
+ readToken(END_ARRAY);
+ root.setRowCount(count);
+ }
+ readToken(END_OBJECT);
+ return true;
+ } else if (t == END_ARRAY) {
+ root.setRowCount(0);
+ return false;
+ } else {
+ throw new IllegalArgumentException("Invalid token: " + t);
+ }
+ }
+
+ /**
+ * Returns the next record batch from the file.
+ */
+ public VectorSchemaRoot read() throws IOException {
+ JsonToken t = parser.nextToken();
+ if (t == START_OBJECT) {
+ VectorSchemaRoot recordBatch = VectorSchemaRoot.create(schema, allocator);
+ {
+ int count = readNextField("count", Integer.class);
+ recordBatch.setRowCount(count);
+ nextFieldIs("columns");
+ readToken(START_ARRAY);
+ {
+ for (Field field : schema.getFields()) {
+ FieldVector vector = recordBatch.getVector(field);
+ readFromJsonIntoVector(field, vector);
+ }
+ }
+ readToken(END_ARRAY);
+ }
+ readToken(END_OBJECT);
+ return recordBatch;
+ } else if (t == END_ARRAY) {
+ return null;
+ } else {
+ throw new IllegalArgumentException("Invalid token: " + t);
+ }
+ }
+
+ private abstract class BufferReader {
+ protected abstract ArrowBuf read(BufferAllocator allocator, int count) throws IOException;
+
+ ArrowBuf readBuffer(BufferAllocator allocator, int count) throws IOException {
+ readToken(START_ARRAY);
+ ArrowBuf buf = read(allocator, count);
+ readToken(END_ARRAY);
+ return buf;
+ }
+ }
+
+ private class BufferHelper {
+ BufferReader BIT = new BufferReader() {
+ @Override
+ protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
+ final int bufferSize = BitVectorHelper.getValidityBufferSize(count);
+ ArrowBuf buf = allocator.buffer(bufferSize);
+
+ // C++ integration test fails without this.
+ buf.setZero(0, bufferSize);
+
+ for (int i = 0; i < count; i++) {
+ parser.nextToken();
+ BitVectorHelper.setValidityBit(buf, i, parser.readValueAs(Boolean.class) ? 1 : 0);
+ }
+
+ buf.writerIndex(bufferSize);
+ return buf;
+ }
+ };
+
+ BufferReader DAY_MILLIS = new BufferReader() {
+ @Override
+ protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
+ final long size = (long) count * IntervalDayVector.TYPE_WIDTH;
+ ArrowBuf buf = allocator.buffer(size);
+
+ for (int i = 0; i < count; i++) {
+ readToken(START_OBJECT);
+ buf.writeInt(readNextField("days", Integer.class));
+ buf.writeInt(readNextField("milliseconds", Integer.class));
+ readToken(END_OBJECT);
+ }
+
+ return buf;
+ }
+ };
+
+ BufferReader MONTH_DAY_NANOS = new BufferReader() {
+ @Override
+ protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
+ final long size = (long) count * IntervalMonthDayNanoVector.TYPE_WIDTH;
+ ArrowBuf buf = allocator.buffer(size);
+
+ for (int i = 0; i < count; i++) {
+ readToken(START_OBJECT);
+ buf.writeInt(readNextField("months", Integer.class));
+ buf.writeInt(readNextField("days", Integer.class));
+ buf.writeLong(readNextField("nanoseconds", Long.class));
+ readToken(END_OBJECT);
+ }
+
+ return buf;
+ }
+ };
+
+
+ BufferReader INT1 = new BufferReader() {
+ @Override
+ protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
+ final long size = (long) count * TinyIntVector.TYPE_WIDTH;
+ ArrowBuf buf = allocator.buffer(size);
+
+ for (int i = 0; i < count; i++) {
+ parser.nextToken();
+ buf.writeByte(parser.getByteValue());
+ }
+
+ return buf;
+ }
+ };
+
+ BufferReader INT2 = new BufferReader() {
+ @Override
+ protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
+ final long size = (long) count * SmallIntVector.TYPE_WIDTH;
+ ArrowBuf buf = allocator.buffer(size);
+
+ for (int i = 0; i < count; i++) {
+ parser.nextToken();
+ buf.writeShort(parser.getShortValue());
+ }
+
+ return buf;
+ }
+ };
+
+ BufferReader INT4 = new BufferReader() {
+ @Override
+ protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
+ final long size = (long) count * IntVector.TYPE_WIDTH;
+ ArrowBuf buf = allocator.buffer(size);
+
+ for (int i = 0; i < count; i++) {
+ parser.nextToken();
+ buf.writeInt(parser.getIntValue());
+ }
+
+ return buf;
+ }
+ };
+
+ BufferReader INT8 = new BufferReader() {
+ @Override
+ protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
+ final long size = (long) count * BigIntVector.TYPE_WIDTH;
+ ArrowBuf buf = allocator.buffer(size);
+
+ for (int i = 0; i < count; i++) {
+ parser.nextToken();
+ String value = parser.getValueAsString();
+ buf.writeLong(Long.valueOf(value));
+ }
+
+ return buf;
+ }
+ };
+
+ BufferReader UINT1 = new BufferReader() {
+ @Override
+ protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
+ final long size = (long) count * TinyIntVector.TYPE_WIDTH;
+ ArrowBuf buf = allocator.buffer(size);
+
+ for (int i = 0; i < count; i++) {
+ parser.nextToken();
+ buf.writeByte(parser.getShortValue() & 0xFF);
+ }
+
+ return buf;
+ }
+ };
+
+ BufferReader UINT2 = new BufferReader() {
+ @Override
+ protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
+ final long size = (long) count * SmallIntVector.TYPE_WIDTH;
+ ArrowBuf buf = allocator.buffer(size);
+
+ for (int i = 0; i < count; i++) {
+ parser.nextToken();
+ buf.writeShort(parser.getIntValue() & 0xFFFF);
+ }
+
+ return buf;
+ }
+ };
+
+ BufferReader UINT4 = new BufferReader() {
+ @Override
+ protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
+ final long size = (long) count * IntVector.TYPE_WIDTH;
+ ArrowBuf buf = allocator.buffer(size);
+
+ for (int i = 0; i < count; i++) {
+ parser.nextToken();
+ buf.writeInt((int) parser.getLongValue());
+ }
+
+ return buf;
+ }
+ };
+
+ BufferReader UINT8 = new BufferReader() {
+ @Override
+ protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
+ final long size = (long) count * BigIntVector.TYPE_WIDTH;
+ ArrowBuf buf = allocator.buffer(size);
+
+ for (int i = 0; i < count; i++) {
+ parser.nextToken();
+ BigInteger value = new BigInteger(parser.getValueAsString());
+ buf.writeLong(value.longValue());
+ }
+
+ return buf;
+ }
+ };
+
+ BufferReader FLOAT4 = new BufferReader() {
+ @Override
+ protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
+ final long size = (long) count * Float4Vector.TYPE_WIDTH;
+ ArrowBuf buf = allocator.buffer(size);
+
+ for (int i = 0; i < count; i++) {
+ parser.nextToken();
+ buf.writeFloat(parser.getFloatValue());
+ }
+
+ return buf;
+ }
+ };
+
+ BufferReader FLOAT8 = new BufferReader() {
+ @Override
+ protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
+ final long size = (long) count * Float8Vector.TYPE_WIDTH;
+ ArrowBuf buf = allocator.buffer(size);
+
+ for (int i = 0; i < count; i++) {
+ parser.nextToken();
+ buf.writeDouble(parser.getDoubleValue());
+ }
+
+ return buf;
+ }
+ };
+
+ BufferReader DECIMAL = new BufferReader() {
+ @Override
+ protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
+ final long size = (long) count * DecimalVector.TYPE_WIDTH;
+ ArrowBuf buf = allocator.buffer(size);
+
+ for (int i = 0; i < count; i++) {
+ parser.nextToken();
+ BigDecimal decimalValue = new BigDecimal(parser.readValueAs(String.class));
+ DecimalUtility.writeBigDecimalToArrowBuf(decimalValue, buf, i, DecimalVector.TYPE_WIDTH);
+ }
+
+ buf.writerIndex(size);
+ return buf;
+ }
+ };
+
+ BufferReader DECIMAL256 = new BufferReader() {
+ @Override
+ protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
+ final long size = (long) count * Decimal256Vector.TYPE_WIDTH;
+ ArrowBuf buf = allocator.buffer(size);
+
+ for (int i = 0; i < count; i++) {
+ parser.nextToken();
+ BigDecimal decimalValue = new BigDecimal(parser.readValueAs(String.class));
+ DecimalUtility.writeBigDecimalToArrowBuf(decimalValue, buf, i, Decimal256Vector.TYPE_WIDTH);
+ }
+
+ buf.writerIndex(size);
+ return buf;
+ }
+ };
+
+ ArrowBuf readBinaryValues(
+ BufferAllocator allocator, int count) throws IOException {
+ ArrayList<byte[]> values = new ArrayList<>(count);
+ long bufferSize = 0L;
+ for (int i = 0; i < count; i++) {
+ parser.nextToken();
+ final byte[] value = decodeHexSafe(parser.readValueAs(String.class));
+ values.add(value);
+ bufferSize += value.length;
+ }
+
+ ArrowBuf buf = allocator.buffer(bufferSize);
+
+ for (byte[] value : values) {
+ buf.writeBytes(value);
+ }
+
+ return buf;
+ }
+
+ ArrowBuf readStringValues(
+ BufferAllocator allocator, int count) throws IOException {
+ ArrayList<byte[]> values = new ArrayList<>(count);
+ long bufferSize = 0L;
+ for (int i = 0; i < count; i++) {
+ parser.nextToken();
+ final byte[] value = parser.getValueAsString().getBytes(UTF_8);
+ values.add(value);
+ bufferSize += value.length;
+ }
+
+ ArrowBuf buf = allocator.buffer(bufferSize);
+
+ for (byte[] value : values) {
+ buf.writeBytes(value);
+ }
+
+ return buf;
+ }
+
+ BufferReader FIXEDSIZEBINARY = new BufferReader() {
+ @Override
+ protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
+ return readBinaryValues(allocator, count);
+ }
+ };
+
+ BufferReader VARCHAR = new BufferReader() {
+ @Override
+ protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
+ return readStringValues(allocator, count);
+ }
+ };
+
+ BufferReader LARGEVARCHAR = new BufferReader() {
+ @Override
+ protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
+ return readStringValues(allocator, count);
+ }
+ };
+
+ BufferReader VARBINARY = new BufferReader() {
+ @Override
+ protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
+ return readBinaryValues(allocator, count);
+ }
+ };
+
+ BufferReader LARGEVARBINARY = new BufferReader() {
+ @Override
+ protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
+ return readBinaryValues(allocator, count);
+ }
+ };
+ }
+
+ private ArrowBuf readIntoBuffer(BufferAllocator allocator, BufferType bufferType,
+ Types.MinorType type, int count) throws IOException {
+ ArrowBuf buf;
+
+ BufferHelper helper = new BufferHelper();
+
+ BufferReader reader = null;
+
+ if (bufferType.equals(VALIDITY)) {
+ reader = helper.BIT;
+ } else if (bufferType.equals(OFFSET)) {
+ if (type == Types.MinorType.LARGELIST ||
+ type == Types.MinorType.LARGEVARCHAR ||
+ type == Types.MinorType.LARGEVARBINARY) {
+ reader = helper.INT8;
+ } else {
+ reader = helper.INT4;
+ }
+ } else if (bufferType.equals(TYPE)) {
+ reader = helper.INT1;
+ } else if (bufferType.equals(DATA)) {
+ switch (type) {
+ case BIT:
+ reader = helper.BIT;
+ break;
+ case TINYINT:
+ reader = helper.INT1;
+ break;
+ case SMALLINT:
+ reader = helper.INT2;
+ break;
+ case INT:
+ reader = helper.INT4;
+ break;
+ case BIGINT:
+ reader = helper.INT8;
+ break;
+ case UINT1:
+ reader = helper.UINT1;
+ break;
+ case UINT2:
+ reader = helper.UINT2;
+ break;
+ case UINT4:
+ reader = helper.UINT4;
+ break;
+ case UINT8:
+ reader = helper.UINT8;
+ break;
+ case FLOAT4:
+ reader = helper.FLOAT4;
+ break;
+ case FLOAT8:
+ reader = helper.FLOAT8;
+ break;
+ case DECIMAL:
+ reader = helper.DECIMAL;
+ break;
+ case DECIMAL256:
+ reader = helper.DECIMAL256;
+ break;
+ case FIXEDSIZEBINARY:
+ reader = helper.FIXEDSIZEBINARY;
+ break;
+ case VARCHAR:
+ reader = helper.VARCHAR;
+ break;
+ case LARGEVARCHAR:
+ reader = helper.LARGEVARCHAR;
+ break;
+ case VARBINARY:
+ reader = helper.VARBINARY;
+ break;
+ case LARGEVARBINARY:
+ reader = helper.LARGEVARBINARY;
+ break;
+ case DATEDAY:
+ reader = helper.INT4;
+ break;
+ case DATEMILLI:
+ reader = helper.INT8;
+ break;
+ case TIMESEC:
+ case TIMEMILLI:
+ reader = helper.INT4;
+ break;
+ case TIMEMICRO:
+ case TIMENANO:
+ reader = helper.INT8;
+ break;
+ case TIMESTAMPNANO:
+ case TIMESTAMPMICRO:
+ case TIMESTAMPMILLI:
+ case TIMESTAMPSEC:
+ case TIMESTAMPNANOTZ:
+ case TIMESTAMPMICROTZ:
+ case TIMESTAMPMILLITZ:
+ case TIMESTAMPSECTZ:
+ reader = helper.INT8;
+ break;
+ case INTERVALYEAR:
+ reader = helper.INT4;
+ break;
+ case INTERVALDAY:
+ reader = helper.DAY_MILLIS;
+ break;
+ case INTERVALMONTHDAYNANO:
+ reader = helper.MONTH_DAY_NANOS;
+ break;
+ case DURATION:
+ reader = helper.INT8;
+ break;
+ default:
+ throw new UnsupportedOperationException("Cannot read array of type " + type);
+ }
+ } else {
+ throw new InvalidArrowFileException("Unrecognized buffer type " + bufferType);
+ }
+
+ buf = reader.readBuffer(allocator, count);
+
+ Preconditions.checkNotNull(buf);
+ return buf;
+ }
+
+ private void readFromJsonIntoVector(Field field, FieldVector vector) throws JsonParseException, IOException {
+ TypeLayout typeLayout = TypeLayout.getTypeLayout(field.getType());
+ List<BufferType> vectorTypes = typeLayout.getBufferTypes();
+ ArrowBuf[] vectorBuffers = new ArrowBuf[vectorTypes.size()];
+ /*
+ * The order of inner buffers is :
+ * Fixed width vector:
+ * -- validity buffer
+ * -- data buffer
+ * Variable width vector:
+ * -- validity buffer
+ * -- offset buffer
+ * -- data buffer
+ *
+ * This is similar to what getFieldInnerVectors() used to give but now that we don't have
+ * inner vectors anymore, we will work directly at the buffer level -- populate buffers
+ * locally as we read from Json parser and do loadFieldBuffers on the vector followed by
+ * releasing the local buffers.
+ */
+ readToken(START_OBJECT);
+ {
+ // If currently reading dictionaries, field name is not important so don't check
+ String name = readNextField("name", String.class);
+ if (started && !Objects.equals(field.getName(), name)) {
+ throw new IllegalArgumentException("Expected field " + field.getName() + " but got " + name);
+ }
+
+ /* Initialize the vector with required capacity but don't allocateNew since we would
+ * be doing loadFieldBuffers.
+ */
+ int valueCount = readNextField("count", Integer.class);
+ vector.setInitialCapacity(valueCount);
+
+ for (int v = 0; v < vectorTypes.size(); v++) {
+ BufferType bufferType = vectorTypes.get(v);
+ nextFieldIs(bufferType.getName());
+ int innerBufferValueCount = valueCount;
+ if (bufferType.equals(OFFSET) && !field.getType().getTypeID().equals(ArrowType.ArrowTypeID.Union)) {
+ /* offset buffer has 1 additional value capacity */
+ innerBufferValueCount = valueCount + 1;
+ }
+
+ vectorBuffers[v] = readIntoBuffer(allocator, bufferType, vector.getMinorType(), innerBufferValueCount);
+ }
+
+ if (vectorBuffers.length == 0) {
+ readToken(END_OBJECT);
+ return;
+ }
+
+ int nullCount = 0;
+ if (!(vector.getField().getFieldType().getType() instanceof ArrowType.Union)) {
+ nullCount = BitVectorHelper.getNullCount(vectorBuffers[0], valueCount);
+ }
+ final ArrowFieldNode fieldNode = new ArrowFieldNode(valueCount, nullCount);
+ vector.loadFieldBuffers(fieldNode, Arrays.asList(vectorBuffers));
+
+ /* read child vectors (if any) */
+ List<Field> fields = field.getChildren();
+ if (!fields.isEmpty()) {
+ List<FieldVector> vectorChildren = vector.getChildrenFromFields();
+ if (fields.size() != vectorChildren.size()) {
+ throw new IllegalArgumentException(
+ "fields and children are not the same size: " + fields.size() + " != " + vectorChildren.size());
+ }
+ nextFieldIs("children");
+ readToken(START_ARRAY);
+ for (int i = 0; i < fields.size(); i++) {
+ Field childField = fields.get(i);
+ FieldVector childVector = vectorChildren.get(i);
+ readFromJsonIntoVector(childField, childVector);
+ }
+ readToken(END_ARRAY);
+ }
+ }
+ readToken(END_OBJECT);
+
+ for (ArrowBuf buffer: vectorBuffers) {
+ buffer.getReferenceManager().release();
+ }
+ }
+
+ private byte[] decodeHexSafe(String hexString) throws IOException {
+ try {
+ return Hex.decodeHex(hexString.toCharArray());
+ } catch (DecoderException e) {
+ throw new IOException("Unable to decode hex string: " + hexString, e);
+ }
+ }
+
+ @Override
+ public void close() throws IOException {
+ parser.close();
+ for (Dictionary dictionary : dictionaries.values()) {
+ dictionary.getVector().close();
+ }
+ }
+
+ private <T> T readNextField(String expectedFieldName, Class<T> c) throws IOException, JsonParseException {
+ nextFieldIs(expectedFieldName);
+ parser.nextToken();
+ return parser.readValueAs(c);
+ }
+
+ private void nextFieldIs(String expectedFieldName) throws IOException, JsonParseException {
+ String name = parser.nextFieldName();
+ if (name == null || !name.equals(expectedFieldName)) {
+ throw new IllegalStateException("Expected " + expectedFieldName + " but got " + name);
+ }
+ }
+
+ private void readToken(JsonToken expected) throws JsonParseException, IOException {
+ JsonToken t = parser.nextToken();
+ if (t != expected) {
+ throw new IllegalStateException("Expected " + expected + " but got " + t);
+ }
+ }
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java
new file mode 100644
index 000000000..58760c1a9
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java
@@ -0,0 +1,417 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import static org.apache.arrow.vector.BufferLayout.BufferType.*;
+
+import java.io.File;
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.BaseVariableWidthVector;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.BitVectorHelper;
+import org.apache.arrow.vector.BufferLayout.BufferType;
+import org.apache.arrow.vector.DateDayVector;
+import org.apache.arrow.vector.DateMilliVector;
+import org.apache.arrow.vector.Decimal256Vector;
+import org.apache.arrow.vector.DecimalVector;
+import org.apache.arrow.vector.DurationVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.FixedSizeBinaryVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.IntervalDayVector;
+import org.apache.arrow.vector.IntervalMonthDayNanoVector;
+import org.apache.arrow.vector.IntervalYearVector;
+import org.apache.arrow.vector.SmallIntVector;
+import org.apache.arrow.vector.TimeMicroVector;
+import org.apache.arrow.vector.TimeMilliVector;
+import org.apache.arrow.vector.TimeNanoVector;
+import org.apache.arrow.vector.TimeSecVector;
+import org.apache.arrow.vector.TimeStampMicroTZVector;
+import org.apache.arrow.vector.TimeStampMicroVector;
+import org.apache.arrow.vector.TimeStampMilliTZVector;
+import org.apache.arrow.vector.TimeStampMilliVector;
+import org.apache.arrow.vector.TimeStampNanoTZVector;
+import org.apache.arrow.vector.TimeStampNanoVector;
+import org.apache.arrow.vector.TimeStampSecTZVector;
+import org.apache.arrow.vector.TimeStampSecVector;
+import org.apache.arrow.vector.TinyIntVector;
+import org.apache.arrow.vector.TypeLayout;
+import org.apache.arrow.vector.UInt1Vector;
+import org.apache.arrow.vector.UInt2Vector;
+import org.apache.arrow.vector.UInt4Vector;
+import org.apache.arrow.vector.UInt8Vector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.util.DecimalUtility;
+import org.apache.arrow.vector.util.DictionaryUtility;
+import org.apache.commons.codec.binary.Hex;
+
+import com.fasterxml.jackson.core.JsonEncoding;
+import com.fasterxml.jackson.core.JsonGenerator;
+import com.fasterxml.jackson.core.util.DefaultPrettyPrinter;
+import com.fasterxml.jackson.core.util.DefaultPrettyPrinter.NopIndenter;
+import com.fasterxml.jackson.databind.MappingJsonFactory;
+
+/**
+ * A writer that converts binary Vectors into a JSON format suitable
+ * for integration testing.
+ */
+public class JsonFileWriter implements AutoCloseable {
+
+ /**
+ * Configuration POJO for writing JSON files.
+ */
+ public static final class JSONWriteConfig {
+ private final boolean pretty;
+
+ private JSONWriteConfig(boolean pretty) {
+ this.pretty = pretty;
+ }
+
+ private JSONWriteConfig() {
+ this.pretty = false;
+ }
+
+ public JSONWriteConfig pretty(boolean pretty) {
+ return new JSONWriteConfig(pretty);
+ }
+ }
+
+ public static JSONWriteConfig config() {
+ return new JSONWriteConfig();
+ }
+
+ private final JsonGenerator generator;
+ private Schema schema;
+
+ /**
+ * Constructs a new writer that will output to <code>outputFile</code>.
+ */
+ public JsonFileWriter(File outputFile) throws IOException {
+ this(outputFile, config());
+ }
+
+ /**
+ * Constructs a new writer that will output to <code>outputFile</code> with the given options.
+ */
+ public JsonFileWriter(File outputFile, JSONWriteConfig config) throws IOException {
+ MappingJsonFactory jsonFactory = new MappingJsonFactory();
+ this.generator = jsonFactory.createGenerator(outputFile, JsonEncoding.UTF8);
+ if (config.pretty) {
+ DefaultPrettyPrinter prettyPrinter = new DefaultPrettyPrinter();
+ prettyPrinter.indentArraysWith(NopIndenter.instance);
+ this.generator.setPrettyPrinter(prettyPrinter);
+ }
+ // Allow writing of floating point NaN values not as strings
+ this.generator.configure(JsonGenerator.Feature.QUOTE_NON_NUMERIC_NUMBERS, false);
+ }
+
+ /**
+ * Writes out the "header" of the file including the schema and any dictionaries required.
+ */
+ public void start(Schema schema, DictionaryProvider provider) throws IOException {
+ List<Field> fields = new ArrayList<>(schema.getFields().size());
+ Set<Long> dictionaryIdsUsed = new HashSet<>();
+ this.schema = schema; // Store original Schema to ensure batches written match
+
+ // Convert fields with dictionaries to have dictionary type
+ for (Field field : schema.getFields()) {
+ fields.add(DictionaryUtility.toMessageFormat(field, provider, dictionaryIdsUsed));
+ }
+ Schema updatedSchema = new Schema(fields, schema.getCustomMetadata());
+
+ generator.writeStartObject();
+ generator.writeObjectField("schema", updatedSchema);
+
+ // Write all dictionaries that were used
+ if (!dictionaryIdsUsed.isEmpty()) {
+ writeDictionaryBatches(generator, dictionaryIdsUsed, provider);
+ }
+
+ // Start writing of record batches
+ generator.writeArrayFieldStart("batches");
+ }
+
+ private void writeDictionaryBatches(JsonGenerator generator, Set<Long> dictionaryIdsUsed, DictionaryProvider provider)
+ throws IOException {
+ generator.writeArrayFieldStart("dictionaries");
+ for (Long id : dictionaryIdsUsed) {
+ generator.writeStartObject();
+ generator.writeObjectField("id", id);
+
+ generator.writeFieldName("data");
+ Dictionary dictionary = provider.lookup(id);
+ FieldVector vector = dictionary.getVector();
+ List<Field> fields = Collections.singletonList(vector.getField());
+ List<FieldVector> vectors = Collections.singletonList(vector);
+ VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors, vector.getValueCount());
+ writeBatch(root);
+
+ generator.writeEndObject();
+ }
+ generator.writeEndArray();
+ }
+
+ /** Writes the record batch to the JSON file. */
+ public void write(VectorSchemaRoot recordBatch) throws IOException {
+ if (!recordBatch.getSchema().equals(schema)) {
+ throw new IllegalArgumentException("record batches must have the same schema: " + schema);
+ }
+ writeBatch(recordBatch);
+ }
+
+ private void writeBatch(VectorSchemaRoot recordBatch) throws IOException {
+ generator.writeStartObject();
+ {
+ generator.writeObjectField("count", recordBatch.getRowCount());
+ generator.writeArrayFieldStart("columns");
+ for (Field field : recordBatch.getSchema().getFields()) {
+ FieldVector vector = recordBatch.getVector(field);
+ writeFromVectorIntoJson(field, vector);
+ }
+ generator.writeEndArray();
+ }
+ generator.writeEndObject();
+ }
+
+ private void writeFromVectorIntoJson(Field field, FieldVector vector) throws IOException {
+ List<BufferType> vectorTypes = TypeLayout.getTypeLayout(field.getType()).getBufferTypes();
+ List<ArrowBuf> vectorBuffers = vector.getFieldBuffers();
+ if (vectorTypes.size() != vectorBuffers.size()) {
+ throw new IllegalArgumentException("vector types and inner vector buffers are not the same size: " +
+ vectorTypes.size() + " != " + vectorBuffers.size());
+ }
+ generator.writeStartObject();
+ {
+ generator.writeObjectField("name", field.getName());
+ int valueCount = vector.getValueCount();
+ generator.writeObjectField("count", valueCount);
+
+ for (int v = 0; v < vectorTypes.size(); v++) {
+ BufferType bufferType = vectorTypes.get(v);
+ ArrowBuf vectorBuffer = vectorBuffers.get(v);
+ generator.writeArrayFieldStart(bufferType.getName());
+ final int bufferValueCount = (bufferType.equals(OFFSET) && vector.getMinorType() != MinorType.DENSEUNION) ?
+ valueCount + 1 : valueCount;
+ for (int i = 0; i < bufferValueCount; i++) {
+ if (bufferType.equals(DATA) && (vector.getMinorType() == MinorType.VARCHAR ||
+ vector.getMinorType() == MinorType.VARBINARY)) {
+ writeValueToGenerator(bufferType, vectorBuffer, vectorBuffers.get(v - 1), vector, i);
+ } else if (bufferType.equals(OFFSET) && vector.getValueCount() == 0 &&
+ (vector.getMinorType() == MinorType.VARBINARY || vector.getMinorType() == MinorType.VARCHAR)) {
+ ArrowBuf vectorBufferTmp = vector.getAllocator().buffer(4);
+ vectorBufferTmp.setInt(0, 0);
+ writeValueToGenerator(bufferType, vectorBufferTmp, null, vector, i);
+ vectorBufferTmp.close();
+ } else {
+ writeValueToGenerator(bufferType, vectorBuffer, null, vector, i);
+ }
+ }
+ generator.writeEndArray();
+ }
+ List<Field> fields = field.getChildren();
+ List<FieldVector> children = vector.getChildrenFromFields();
+ if (fields.size() != children.size()) {
+ throw new IllegalArgumentException("fields and children are not the same size: " + fields.size() + " != " +
+ children.size());
+ }
+ if (fields.size() > 0) {
+ generator.writeArrayFieldStart("children");
+ for (int i = 0; i < fields.size(); i++) {
+ Field childField = fields.get(i);
+ FieldVector childVector = children.get(i);
+ writeFromVectorIntoJson(childField, childVector);
+ }
+ generator.writeEndArray();
+ }
+ }
+ generator.writeEndObject();
+ }
+
+ private void writeValueToGenerator(
+ BufferType bufferType,
+ ArrowBuf buffer,
+ ArrowBuf offsetBuffer,
+ FieldVector vector,
+ final int index) throws IOException {
+ if (bufferType.equals(TYPE)) {
+ generator.writeNumber(buffer.getByte(index * TinyIntVector.TYPE_WIDTH));
+ } else if (bufferType.equals(OFFSET)) {
+ generator.writeNumber(buffer.getInt(index * BaseVariableWidthVector.OFFSET_WIDTH));
+ } else if (bufferType.equals(VALIDITY)) {
+ generator.writeNumber(vector.isNull(index) ? 0 : 1);
+ } else if (bufferType.equals(DATA)) {
+ switch (vector.getMinorType()) {
+ case TINYINT:
+ generator.writeNumber(TinyIntVector.get(buffer, index));
+ break;
+ case SMALLINT:
+ generator.writeNumber(SmallIntVector.get(buffer, index));
+ break;
+ case INT:
+ generator.writeNumber(IntVector.get(buffer, index));
+ break;
+ case BIGINT:
+ generator.writeString(String.valueOf(BigIntVector.get(buffer, index)));
+ break;
+ case UINT1:
+ generator.writeNumber(UInt1Vector.getNoOverflow(buffer, index));
+ break;
+ case UINT2:
+ generator.writeNumber(UInt2Vector.get(buffer, index));
+ break;
+ case UINT4:
+ generator.writeNumber(UInt4Vector.getNoOverflow(buffer, index));
+ break;
+ case UINT8:
+ generator.writeString(UInt8Vector.getNoOverflow(buffer, index).toString());
+ break;
+ case FLOAT4:
+ generator.writeNumber(Float4Vector.get(buffer, index));
+ break;
+ case FLOAT8:
+ generator.writeNumber(Float8Vector.get(buffer, index));
+ break;
+ case DATEDAY:
+ generator.writeNumber(DateDayVector.get(buffer, index));
+ break;
+ case DATEMILLI:
+ generator.writeNumber(DateMilliVector.get(buffer, index));
+ break;
+ case TIMESEC:
+ generator.writeNumber(TimeSecVector.get(buffer, index));
+ break;
+ case TIMEMILLI:
+ generator.writeNumber(TimeMilliVector.get(buffer, index));
+ break;
+ case TIMEMICRO:
+ generator.writeNumber(TimeMicroVector.get(buffer, index));
+ break;
+ case TIMENANO:
+ generator.writeNumber(TimeNanoVector.get(buffer, index));
+ break;
+ case TIMESTAMPSEC:
+ generator.writeNumber(TimeStampSecVector.get(buffer, index));
+ break;
+ case TIMESTAMPMILLI:
+ generator.writeNumber(TimeStampMilliVector.get(buffer, index));
+ break;
+ case TIMESTAMPMICRO:
+ generator.writeNumber(TimeStampMicroVector.get(buffer, index));
+ break;
+ case TIMESTAMPNANO:
+ generator.writeNumber(TimeStampNanoVector.get(buffer, index));
+ break;
+ case TIMESTAMPSECTZ:
+ generator.writeNumber(TimeStampSecTZVector.get(buffer, index));
+ break;
+ case TIMESTAMPMILLITZ:
+ generator.writeNumber(TimeStampMilliTZVector.get(buffer, index));
+ break;
+ case TIMESTAMPMICROTZ:
+ generator.writeNumber(TimeStampMicroTZVector.get(buffer, index));
+ break;
+ case TIMESTAMPNANOTZ:
+ generator.writeNumber(TimeStampNanoTZVector.get(buffer, index));
+ break;
+ case DURATION:
+ generator.writeNumber(DurationVector.get(buffer, index));
+ break;
+ case INTERVALYEAR:
+ generator.writeNumber(IntervalYearVector.getTotalMonths(buffer, index));
+ break;
+ case INTERVALDAY:
+ generator.writeStartObject();
+ generator.writeObjectField("days", IntervalDayVector.getDays(buffer, index));
+ generator.writeObjectField("milliseconds", IntervalDayVector.getMilliseconds(buffer, index));
+ generator.writeEndObject();
+ break;
+ case INTERVALMONTHDAYNANO:
+ generator.writeStartObject();
+ generator.writeObjectField("months", IntervalMonthDayNanoVector.getMonths(buffer, index));
+ generator.writeObjectField("days", IntervalMonthDayNanoVector.getDays(buffer, index));
+ generator.writeObjectField("nanoseconds", IntervalMonthDayNanoVector.getNanoseconds(buffer, index));
+ generator.writeEndObject();
+ break;
+ case BIT:
+ generator.writeNumber(BitVectorHelper.get(buffer, index));
+ break;
+ case VARBINARY: {
+ Preconditions.checkNotNull(offsetBuffer);
+ String hexString = Hex.encodeHexString(BaseVariableWidthVector.get(buffer,
+ offsetBuffer, index));
+ generator.writeObject(hexString);
+ break;
+ }
+ case FIXEDSIZEBINARY:
+ int byteWidth = ((FixedSizeBinaryVector) vector).getByteWidth();
+ String fixedSizeHexString = Hex.encodeHexString(FixedSizeBinaryVector.get(buffer, index, byteWidth));
+ generator.writeObject(fixedSizeHexString);
+ break;
+ case VARCHAR: {
+ Preconditions.checkNotNull(offsetBuffer);
+ byte[] b = (BaseVariableWidthVector.get(buffer, offsetBuffer, index));
+ generator.writeString(new String(b, "UTF-8"));
+ break;
+ }
+ case DECIMAL: {
+ int scale = ((DecimalVector) vector).getScale();
+ BigDecimal decimalValue = DecimalUtility.getBigDecimalFromArrowBuf(buffer, index, scale,
+ DecimalVector.TYPE_WIDTH);
+ // We write the unscaled value, because the scale is stored in the type metadata.
+ generator.writeString(decimalValue.unscaledValue().toString());
+ break;
+ }
+ case DECIMAL256: {
+ int scale = ((Decimal256Vector) vector).getScale();
+ BigDecimal decimalValue = DecimalUtility.getBigDecimalFromArrowBuf(buffer, index, scale,
+ Decimal256Vector.TYPE_WIDTH);
+ // We write the unscaled value, because the scale is stored in the type metadata.
+ generator.writeString(decimalValue.unscaledValue().toString());
+ break;
+ }
+
+ default:
+ throw new UnsupportedOperationException("minor type: " + vector.getMinorType());
+ }
+ }
+ }
+
+ @Override
+ public void close() throws IOException {
+ generator.writeEndArray();
+ generator.writeEndObject();
+ generator.close();
+ }
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ReadChannel.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ReadChannel.java
new file mode 100644
index 000000000..db79661a8
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ReadChannel.java
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.channels.ReadableByteChannel;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Adapter around {@link ReadableByteChannel} that reads into {@linkplain ArrowBuf}s.
+ */
+public class ReadChannel implements AutoCloseable {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(ReadChannel.class);
+
+ private ReadableByteChannel in;
+ private long bytesRead = 0;
+
+ public ReadChannel(ReadableByteChannel in) {
+ this.in = in;
+ }
+
+ public long bytesRead() {
+ return bytesRead;
+ }
+
+ /**
+ * Reads bytes into buffer until it is full (buffer.remaining() == 0). Returns the
+ * number of bytes read which can be less than full if there are no more.
+ *
+ * @param buffer The buffer to read to
+ * @return the number of byte read
+ * @throws IOException if nit enough bytes left to read
+ */
+ public int readFully(ByteBuffer buffer) throws IOException {
+ if (LOGGER.isDebugEnabled()) {
+ LOGGER.debug("Reading buffer with size: {}", buffer.remaining());
+ }
+ int totalRead = 0;
+ while (buffer.remaining() != 0) {
+ int read = in.read(buffer);
+ if (read == -1) {
+ this.bytesRead += totalRead;
+ return totalRead;
+ }
+ totalRead += read;
+ if (read == 0) {
+ break;
+ }
+ }
+ this.bytesRead += totalRead;
+ return totalRead;
+ }
+
+ /**
+ * Reads up to len into buffer. Returns bytes read.
+ *
+ * @param buffer the buffer to read to
+ * @param length the amount of bytes to read
+ * @return the number of bytes read
+ * @throws IOException if nit enough bytes left to read
+ */
+ public long readFully(ArrowBuf buffer, long length) throws IOException {
+ boolean fullRead = true;
+ long bytesLeft = length;
+ while (fullRead && bytesLeft > 0) {
+ int bytesToRead = (int) Math.min(bytesLeft, Integer.MAX_VALUE);
+ int n = readFully(buffer.nioBuffer(buffer.writerIndex(), bytesToRead));
+ buffer.writerIndex(buffer.writerIndex() + n);
+ fullRead = n == bytesToRead;
+ bytesLeft -= n;
+ }
+ return length - bytesLeft;
+ }
+
+ @Override
+ public void close() throws IOException {
+ if (this.in != null) {
+ in.close();
+ in = null;
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/SeekableReadChannel.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/SeekableReadChannel.java
new file mode 100644
index 000000000..4b6e0ed76
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/SeekableReadChannel.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import java.io.IOException;
+import java.nio.channels.SeekableByteChannel;
+
+/**
+ * An {@link ReadChannel} that supports seeking to a
+ * random position.
+ */
+public class SeekableReadChannel extends ReadChannel {
+
+ private final SeekableByteChannel in;
+
+ public SeekableReadChannel(SeekableByteChannel in) {
+ super(in);
+ this.in = in;
+ }
+
+ public void setPosition(long position) throws IOException {
+ in.position(position);
+ }
+
+ public long size() throws IOException {
+ return in.size();
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/WriteChannel.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/WriteChannel.java
new file mode 100644
index 000000000..9ad71f6fe
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/WriteChannel.java
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.channels.WritableByteChannel;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.vector.ipc.message.FBSerializable;
+import org.apache.arrow.vector.ipc.message.MessageSerializer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.flatbuffers.FlatBufferBuilder;
+
+/**
+ * Wrapper around a WritableByteChannel that maintains the position as well adding
+ * some common serialization utilities.
+ *
+ * <p>All write methods in this class follow full write semantics, i.e., write calls
+ * only return after requested data has been fully written. Note this is different
+ * from java WritableByteChannel interface where partial write is allowed
+ * </p>
+ * <p>
+ * Please note that objects of this class are not thread-safe.
+ * </p>
+ */
+public class WriteChannel implements AutoCloseable {
+ private static final Logger LOGGER = LoggerFactory.getLogger(WriteChannel.class);
+
+ private static final byte[] ZERO_BYTES = new byte[8];
+
+ private final byte[] intBuf = new byte[4];
+
+ private long currentPosition = 0;
+
+ private final WritableByteChannel out;
+
+ public WriteChannel(WritableByteChannel out) {
+ this.out = out;
+ }
+
+ @Override
+ public void close() throws IOException {
+ out.close();
+ }
+
+ public long getCurrentPosition() {
+ return currentPosition;
+ }
+
+ public long write(byte[] buffer) throws IOException {
+ return write(ByteBuffer.wrap(buffer));
+ }
+
+ long write(byte[] buffer, int offset, int length) throws IOException {
+ return write(ByteBuffer.wrap(buffer, offset, length));
+ }
+
+ /**
+ * Writes <zeroCount>zeroCount</zeroCount> zeros the underlying channel.
+ */
+ public long writeZeros(long zeroCount) throws IOException {
+ long bytesWritten = 0;
+ long wholeWordsEnd = zeroCount - 8;
+ while (bytesWritten <= wholeWordsEnd) {
+ bytesWritten += write(ZERO_BYTES);
+ }
+
+ if (bytesWritten < zeroCount) {
+ bytesWritten += write(ZERO_BYTES, 0, (int) (zeroCount - bytesWritten));
+ }
+ return bytesWritten;
+ }
+
+ /**
+ * Writes enough bytes to align the channel to an 8-byte boundary.
+ */
+ public long align() throws IOException {
+ int trailingByteSize = (int) (currentPosition % 8);
+ if (trailingByteSize != 0) { // align on 8 byte boundaries
+ return writeZeros(8 - trailingByteSize);
+ }
+ return 0;
+ }
+
+ /**
+ * Writes all data from <code>buffer</code> to the underlying channel.
+ */
+ public long write(ByteBuffer buffer) throws IOException {
+ long length = buffer.remaining();
+ if (LOGGER.isDebugEnabled()) {
+ LOGGER.debug("Writing buffer with size: {}", length);
+ }
+ while (buffer.hasRemaining()) {
+ out.write(buffer);
+ }
+ currentPosition += length;
+ return length;
+ }
+
+ /**
+ * Writes <code>v</code> in little-endian format to the underlying channel.
+ */
+ public long writeIntLittleEndian(int v) throws IOException {
+ MessageSerializer.intToBytes(v, intBuf);
+ return write(intBuf);
+ }
+
+ /**
+ * Writes the buffer to the underlying channel.
+ */
+ public void write(ArrowBuf buffer) throws IOException {
+ long bytesWritten = 0;
+ while (bytesWritten < buffer.readableBytes()) {
+ int bytesToWrite = (int) Math.min(Integer.MAX_VALUE, buffer.readableBytes() - bytesWritten);
+ ByteBuffer nioBuffer = buffer.nioBuffer(buffer.readerIndex() + bytesWritten,
+ bytesToWrite);
+ write(nioBuffer);
+ bytesWritten += bytesToWrite;
+ }
+
+ }
+
+ /**
+ * Writes the serialized flatbuffer to the underlying channel. If withSizePrefix
+ * is true then the length in bytes of the buffer will first be written in little endian format.
+ */
+ public long write(FBSerializable writer, boolean withSizePrefix) throws IOException {
+ ByteBuffer buffer = serialize(writer);
+ if (withSizePrefix) {
+ writeIntLittleEndian(buffer.remaining());
+ }
+ return write(buffer);
+ }
+
+ /**
+ * Serializes writer to a ByteBuffer.
+ */
+ public static ByteBuffer serialize(FBSerializable writer) {
+ FlatBufferBuilder builder = new FlatBufferBuilder();
+ int root = writer.writeTo(builder);
+ builder.finish(root);
+ return builder.dataBuffer();
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowBlock.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowBlock.java
new file mode 100644
index 000000000..a235102ce
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowBlock.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc.message;
+
+import org.apache.arrow.flatbuf.Block;
+
+import com.google.flatbuffers.FlatBufferBuilder;
+
+/** Metadata for an arrow message in a channel. */
+public class ArrowBlock implements FBSerializable {
+
+ private final long offset;
+ private final int metadataLength;
+ private final long bodyLength;
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param offset The offset into the channel file where the block was written.
+ * @param metadataLength The length of the flatbuffer metadata in the block.
+ * @param bodyLength The length of data in the block.
+ */
+ public ArrowBlock(long offset, int metadataLength, long bodyLength) {
+ super();
+ this.offset = offset;
+ this.metadataLength = metadataLength;
+ this.bodyLength = bodyLength;
+ }
+
+ public long getOffset() {
+ return offset;
+ }
+
+ public int getMetadataLength() {
+ return metadataLength;
+ }
+
+ public long getBodyLength() {
+ return bodyLength;
+ }
+
+ @Override
+ public int writeTo(FlatBufferBuilder builder) {
+ return Block.createBlock(builder, offset, metadataLength, bodyLength);
+ }
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + (int) (bodyLength ^ (bodyLength >>> 32));
+ result = prime * result + metadataLength;
+ result = prime * result + (int) (offset ^ (offset >>> 32));
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj) {
+ return true;
+ }
+ if (obj == null) {
+ return false;
+ }
+ if (getClass() != obj.getClass()) {
+ return false;
+ }
+ ArrowBlock other = (ArrowBlock) obj;
+ if (bodyLength != other.bodyLength) {
+ return false;
+ }
+ if (metadataLength != other.metadataLength) {
+ return false;
+ }
+ if (offset != other.offset) {
+ return false;
+ }
+ return true;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowBodyCompression.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowBodyCompression.java
new file mode 100644
index 000000000..5370ddfa0
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowBodyCompression.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc.message;
+
+import org.apache.arrow.flatbuf.BodyCompression;
+
+import com.google.flatbuffers.FlatBufferBuilder;
+
+/**
+ * Compression information about data written to a channel.
+ */
+public class ArrowBodyCompression implements FBSerializable {
+
+ private final byte codec;
+
+ private final byte method;
+
+ public ArrowBodyCompression(byte codec, byte method) {
+ this.codec = codec;
+ this.method = method;
+ }
+
+ @Override
+ public int writeTo(FlatBufferBuilder builder) {
+ return BodyCompression.createBodyCompression(builder, codec, method);
+ }
+
+ public byte getCodec() {
+ return codec;
+ }
+
+ public byte getMethod() {
+ return method;
+ }
+
+ @Override
+ public String toString() {
+ return "ArrowBodyCompression [codec=" + codec + ", method=" + method + "]";
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowBuffer.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowBuffer.java
new file mode 100644
index 000000000..d3aec6fb7
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowBuffer.java
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc.message;
+
+import org.apache.arrow.flatbuf.Buffer;
+
+import com.google.flatbuffers.FlatBufferBuilder;
+
+/** Metadata for a buffer written to a channel. */
+public class ArrowBuffer implements FBSerializable {
+
+ private long offset;
+ private long size;
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param offset The offset to the start of the buffer in the channel.
+ * @param size The size of the buffer.
+ */
+ public ArrowBuffer(long offset, long size) {
+ super();
+ this.offset = offset;
+ this.size = size;
+ }
+
+ public long getOffset() {
+ return offset;
+ }
+
+ public long getSize() {
+ return size;
+ }
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + (int) (offset ^ (offset >>> 32));
+ result = prime * result + (int) (size ^ (size >>> 32));
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj) {
+ return true;
+ }
+ if (obj == null) {
+ return false;
+ }
+ if (getClass() != obj.getClass()) {
+ return false;
+ }
+ ArrowBuffer other = (ArrowBuffer) obj;
+ if (offset != other.offset) {
+ return false;
+ }
+ if (size != other.size) {
+ return false;
+ }
+ return true;
+ }
+
+ @Override
+ public int writeTo(FlatBufferBuilder builder) {
+ return Buffer.createBuffer(builder, offset, size);
+ }
+
+ @Override
+ public String toString() {
+ return "ArrowBuffer [offset=" + offset + ", size=" + size + "]";
+ }
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowDictionaryBatch.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowDictionaryBatch.java
new file mode 100644
index 000000000..cac2a1cb8
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowDictionaryBatch.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc.message;
+
+import org.apache.arrow.flatbuf.DictionaryBatch;
+import org.apache.arrow.flatbuf.MessageHeader;
+
+import com.google.flatbuffers.FlatBufferBuilder;
+
+/**
+ * POJO wrapper around a Dictionary Batch IPC messages
+ * (https://arrow.apache.org/docs/format/IPC.html#dictionary-batches)
+ */
+public class ArrowDictionaryBatch implements ArrowMessage {
+
+ private final long dictionaryId;
+ private final ArrowRecordBatch dictionary;
+ private final boolean isDelta;
+
+ @Deprecated
+ public ArrowDictionaryBatch(long dictionaryId, ArrowRecordBatch dictionary) {
+ this (dictionaryId, dictionary, false);
+ }
+
+ /**
+ * Constructs new instance.
+ */
+ public ArrowDictionaryBatch(long dictionaryId, ArrowRecordBatch dictionary, boolean isDelta) {
+ this.dictionaryId = dictionaryId;
+ this.dictionary = dictionary;
+ this.isDelta = isDelta;
+ }
+
+ public boolean isDelta() {
+ return isDelta;
+ }
+
+ public byte getMessageType() {
+ return MessageHeader.DictionaryBatch;
+ }
+
+ public long getDictionaryId() {
+ return dictionaryId;
+ }
+
+ public ArrowRecordBatch getDictionary() {
+ return dictionary;
+ }
+
+ @Override
+ public int writeTo(FlatBufferBuilder builder) {
+ int dataOffset = dictionary.writeTo(builder);
+ DictionaryBatch.startDictionaryBatch(builder);
+ DictionaryBatch.addId(builder, dictionaryId);
+ DictionaryBatch.addData(builder, dataOffset);
+ DictionaryBatch.addIsDelta(builder, isDelta);
+ return DictionaryBatch.endDictionaryBatch(builder);
+ }
+
+ @Override
+ public long computeBodyLength() {
+ return dictionary.computeBodyLength();
+ }
+
+ @Override
+ public <T> T accepts(ArrowMessageVisitor<T> visitor) {
+ return visitor.visit(this);
+ }
+
+ @Override
+ public String toString() {
+ return "ArrowDictionaryBatch [dictionaryId=" + dictionaryId + ", dictionary=" + dictionary + "]";
+ }
+
+ @Override
+ public void close() {
+ dictionary.close();
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowFieldNode.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowFieldNode.java
new file mode 100644
index 000000000..9ce5e2e4d
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowFieldNode.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc.message;
+
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+
+import org.apache.arrow.flatbuf.FieldNode;
+
+import com.google.flatbuffers.FlatBufferBuilder;
+
+/**
+ * Metadata about Vectors/Arrays that is written to a channel.
+ */
+public class ArrowFieldNode implements FBSerializable {
+
+ private final int length;
+ private final int nullCount;
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param length The number of values written.
+ * @param nullCount The number of null values.
+ */
+ public ArrowFieldNode(long length, long nullCount) {
+ super();
+ this.length = checkedCastToInt(length);
+ this.nullCount = checkedCastToInt(nullCount);
+ }
+
+ @Override
+ public int writeTo(FlatBufferBuilder builder) {
+ return FieldNode.createFieldNode(builder, length, nullCount);
+ }
+
+ public int getNullCount() {
+ return nullCount;
+ }
+
+ public int getLength() {
+ return length;
+ }
+
+ @Override
+ public String toString() {
+ return "ArrowFieldNode [length=" + length + ", nullCount=" + nullCount + "]";
+ }
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowFooter.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowFooter.java
new file mode 100644
index 000000000..567fabc1d
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowFooter.java
@@ -0,0 +1,226 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc.message;
+
+import static org.apache.arrow.vector.ipc.message.FBSerializables.writeAllStructsToVector;
+import static org.apache.arrow.vector.ipc.message.FBSerializables.writeKeyValues;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.arrow.flatbuf.Block;
+import org.apache.arrow.flatbuf.Footer;
+import org.apache.arrow.flatbuf.KeyValue;
+import org.apache.arrow.vector.types.MetadataVersion;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+import com.google.flatbuffers.FlatBufferBuilder;
+
+/** Footer metadata for the arrow file format. */
+public class ArrowFooter implements FBSerializable {
+
+ private final Schema schema;
+
+ private final List<ArrowBlock> dictionaries;
+
+ private final List<ArrowBlock> recordBatches;
+
+ private final Map<String, String> metaData;
+
+ private final MetadataVersion metadataVersion;
+
+ public ArrowFooter(Schema schema, List<ArrowBlock> dictionaries, List<ArrowBlock> recordBatches) {
+ this(schema, dictionaries, recordBatches, null);
+ }
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param schema The schema for record batches in the file.
+ * @param dictionaries The dictionaries relevant to the file.
+ * @param recordBatches The recordBatches written to the file.
+ * @param metaData user-defined k-v meta data.
+ */
+ public ArrowFooter(
+ Schema schema,
+ List<ArrowBlock> dictionaries,
+ List<ArrowBlock> recordBatches,
+ Map<String, String> metaData) {
+ this(schema, dictionaries, recordBatches, metaData, MetadataVersion.DEFAULT);
+ }
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param schema The schema for record batches in the file.
+ * @param dictionaries The dictionaries relevant to the file.
+ * @param recordBatches The recordBatches written to the file.
+ * @param metaData user-defined k-v meta data.
+ * @param metadataVersion The Arrow metadata version.
+ */
+ public ArrowFooter(
+ Schema schema,
+ List<ArrowBlock> dictionaries,
+ List<ArrowBlock> recordBatches,
+ Map<String, String> metaData,
+ MetadataVersion metadataVersion) {
+ this.schema = schema;
+ this.dictionaries = dictionaries;
+ this.recordBatches = recordBatches;
+ this.metaData = metaData;
+ this.metadataVersion = metadataVersion;
+ }
+
+ /**
+ * Constructs from the corresponding Flatbuffer message.
+ */
+ public ArrowFooter(Footer footer) {
+ this(
+ Schema.convertSchema(footer.schema()),
+ dictionaries(footer),
+ recordBatches(footer),
+ metaData(footer),
+ MetadataVersion.fromFlatbufID(footer.version())
+ );
+ }
+
+ private static List<ArrowBlock> recordBatches(Footer footer) {
+ List<ArrowBlock> recordBatches = new ArrayList<>();
+ Block tempBlock = new Block();
+ int recordBatchesLength = footer.recordBatchesLength();
+ for (int i = 0; i < recordBatchesLength; i++) {
+ Block block = footer.recordBatches(tempBlock, i);
+ recordBatches.add(new ArrowBlock(block.offset(), block.metaDataLength(), block.bodyLength()));
+ }
+ return recordBatches;
+ }
+
+ private static List<ArrowBlock> dictionaries(Footer footer) {
+ List<ArrowBlock> dictionaries = new ArrayList<>();
+ Block tempBlock = new Block();
+
+ int dictionariesLength = footer.dictionariesLength();
+ for (int i = 0; i < dictionariesLength; i++) {
+ Block block = footer.dictionaries(tempBlock, i);
+ dictionaries.add(new ArrowBlock(block.offset(), block.metaDataLength(), block.bodyLength()));
+ }
+ return dictionaries;
+ }
+
+ private static Map<String, String> metaData(Footer footer) {
+ Map<String, String> metaData = new HashMap<>();
+
+ int metaDataLength = footer.customMetadataLength();
+ for (int i = 0; i < metaDataLength; i++) {
+ KeyValue kv = footer.customMetadata(i);
+ metaData.put(kv.key(), kv.value());
+ }
+
+ return metaData;
+ }
+
+ public Schema getSchema() {
+ return schema;
+ }
+
+ public List<ArrowBlock> getDictionaries() {
+ return dictionaries;
+ }
+
+ public List<ArrowBlock> getRecordBatches() {
+ return recordBatches;
+ }
+
+ public Map<String, String> getMetaData() {
+ return metaData;
+ }
+
+ public MetadataVersion getMetadataVersion() {
+ return metadataVersion;
+ }
+
+ @Override
+ public int writeTo(FlatBufferBuilder builder) {
+ int schemaIndex = schema.getSchema(builder);
+ Footer.startDictionariesVector(builder, dictionaries.size());
+ int dicsOffset = writeAllStructsToVector(builder, dictionaries);
+ Footer.startRecordBatchesVector(builder, recordBatches.size());
+ int rbsOffset = writeAllStructsToVector(builder, recordBatches);
+
+ int metaDataOffset = 0;
+ if (metaData != null) {
+ metaDataOffset = writeKeyValues(builder, metaData);
+ }
+
+ Footer.startFooter(builder);
+ Footer.addSchema(builder, schemaIndex);
+ Footer.addDictionaries(builder, dicsOffset);
+ Footer.addRecordBatches(builder, rbsOffset);
+ Footer.addCustomMetadata(builder, metaDataOffset);
+ Footer.addVersion(builder, metadataVersion.toFlatbufID());
+ return Footer.endFooter(builder);
+ }
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + ((dictionaries == null) ? 0 : dictionaries.hashCode());
+ result = prime * result + ((recordBatches == null) ? 0 : recordBatches.hashCode());
+ result = prime * result + ((schema == null) ? 0 : schema.hashCode());
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj) {
+ return true;
+ }
+ if (obj == null) {
+ return false;
+ }
+ if (getClass() != obj.getClass()) {
+ return false;
+ }
+ ArrowFooter other = (ArrowFooter) obj;
+ if (dictionaries == null) {
+ if (other.dictionaries != null) {
+ return false;
+ }
+ } else if (!dictionaries.equals(other.dictionaries)) {
+ return false;
+ }
+ if (recordBatches == null) {
+ if (other.recordBatches != null) {
+ return false;
+ }
+ } else if (!recordBatches.equals(other.recordBatches)) {
+ return false;
+ }
+ if (schema == null) {
+ if (other.schema != null) {
+ return false;
+ }
+ } else if (!schema.equals(other.schema)) {
+ return false;
+ }
+ return true;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowMessage.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowMessage.java
new file mode 100644
index 000000000..4cbc87b4e
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowMessage.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc.message;
+
+/**
+ * Interface for Arrow IPC messages (https://arrow.apache.org/docs/format/IPC.html).
+ */
+public interface ArrowMessage extends FBSerializable, AutoCloseable {
+
+ long computeBodyLength();
+
+ <T> T accepts(ArrowMessageVisitor<T> visitor);
+
+ /** Returns the flatbuffer enum value indicating the type of the message. */
+ byte getMessageType();
+
+ /**
+ * Visitor interface for implementations of {@link ArrowMessage}.
+ *
+ * @param <T> The type of value to return after visiting.
+ */
+ interface ArrowMessageVisitor<T> {
+ T visit(ArrowDictionaryBatch message);
+
+ T visit(ArrowRecordBatch message);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatch.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatch.java
new file mode 100644
index 000000000..dbf2774fb
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatch.java
@@ -0,0 +1,259 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc.message;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import org.apache.arrow.flatbuf.RecordBatch;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.compression.NoCompressionCodec;
+import org.apache.arrow.vector.util.DataSizeRoundingUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.flatbuffers.FlatBufferBuilder;
+
+/**
+ * POJO representation of a RecordBatch IPC message (https://arrow.apache.org/docs/format/IPC.html).
+ */
+public class ArrowRecordBatch implements ArrowMessage {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(ArrowRecordBatch.class);
+
+ /**
+ * Number of records.
+ */
+ private final int length;
+
+ /**
+ * Nodes correspond to the pre-ordered flattened logical schema.
+ */
+ private final List<ArrowFieldNode> nodes;
+
+ private final List<ArrowBuf> buffers;
+
+ private final ArrowBodyCompression bodyCompression;
+
+ private final List<ArrowBuffer> buffersLayout;
+
+ private boolean closed = false;
+
+ public ArrowRecordBatch(
+ int length, List<ArrowFieldNode> nodes, List<ArrowBuf> buffers) {
+ this(length, nodes, buffers, NoCompressionCodec.DEFAULT_BODY_COMPRESSION, true);
+ }
+
+ public ArrowRecordBatch(
+ int length, List<ArrowFieldNode> nodes, List<ArrowBuf> buffers,
+ ArrowBodyCompression bodyCompression) {
+ this(length, nodes, buffers, bodyCompression, true);
+ }
+
+ /**
+ * Construct a record batch from nodes.
+ *
+ * @param length how many rows in this batch
+ * @param nodes field level info
+ * @param buffers will be retained until this recordBatch is closed
+ * @param bodyCompression compression info.
+ */
+ public ArrowRecordBatch(
+ int length, List<ArrowFieldNode> nodes, List<ArrowBuf> buffers,
+ ArrowBodyCompression bodyCompression, boolean alignBuffers) {
+ super();
+ this.length = length;
+ this.nodes = nodes;
+ this.buffers = buffers;
+ Preconditions.checkArgument(bodyCompression != null, "body compression cannot be null");
+ this.bodyCompression = bodyCompression;
+ List<ArrowBuffer> arrowBuffers = new ArrayList<>(buffers.size());
+ long offset = 0;
+ for (ArrowBuf arrowBuf : buffers) {
+ arrowBuf.getReferenceManager().retain();
+ long size = arrowBuf.readableBytes();
+ arrowBuffers.add(new ArrowBuffer(offset, size));
+ if (LOGGER.isDebugEnabled()) {
+ LOGGER.debug("Buffer in RecordBatch at {}, length: {}", offset, size);
+ }
+ offset += size;
+ if (alignBuffers) { // align on 8 byte boundaries
+ offset = DataSizeRoundingUtil.roundUpTo8Multiple(offset);
+ }
+ }
+ this.buffersLayout = Collections.unmodifiableList(arrowBuffers);
+ }
+
+ // clone constructor
+ // this constructor is different from the public ones in that the reference manager's
+ // <code>retain</code> method is not called, so the first <code>dummy</code> parameter is used
+ // to distinguish this from the public constructor.
+ private ArrowRecordBatch(
+ boolean dummy, int length, List<ArrowFieldNode> nodes,
+ List<ArrowBuf> buffers, ArrowBodyCompression bodyCompression) {
+ this.length = length;
+ this.nodes = nodes;
+ this.buffers = buffers;
+ Preconditions.checkArgument(bodyCompression != null, "body compression cannot be null");
+ this.bodyCompression = bodyCompression;
+ this.closed = false;
+ List<ArrowBuffer> arrowBuffers = new ArrayList<>();
+ long offset = 0;
+ for (ArrowBuf arrowBuf : buffers) {
+ long size = arrowBuf.readableBytes();
+ arrowBuffers.add(new ArrowBuffer(offset, size));
+ offset += size;
+ }
+ this.buffersLayout = Collections.unmodifiableList(arrowBuffers);
+ }
+
+ public byte getMessageType() {
+ return org.apache.arrow.flatbuf.MessageHeader.RecordBatch;
+ }
+
+ public int getLength() {
+ return length;
+ }
+
+ public ArrowBodyCompression getBodyCompression() {
+ return bodyCompression;
+ }
+
+ /**
+ * Get the nodes in this record batch.
+ *
+ * @return the FieldNodes corresponding to the schema
+ */
+ public List<ArrowFieldNode> getNodes() {
+ return nodes;
+ }
+
+ /**
+ * Get the record batch buffers.
+ *
+ * @return the buffers containing the data
+ */
+ public List<ArrowBuf> getBuffers() {
+ if (closed) {
+ throw new IllegalStateException("already closed");
+ }
+ return buffers;
+ }
+
+ /**
+ * Create a new ArrowRecordBatch which has the same information as this batch but whose buffers
+ * are owned by that Allocator.
+ *
+ * <p>This will also close this record batch and make it no longer useful.
+ *
+ * @return A cloned ArrowRecordBatch
+ */
+ public ArrowRecordBatch cloneWithTransfer(final BufferAllocator allocator) {
+ final List<ArrowBuf> newBufs = buffers.stream()
+ .map(buf ->
+ (buf.getReferenceManager().transferOwnership(buf, allocator)
+ .getTransferredBuffer())
+ .writerIndex(buf.writerIndex()))
+ .collect(Collectors.toList());
+ close();
+ return new ArrowRecordBatch(false, length, nodes, newBufs, bodyCompression);
+ }
+
+ /**
+ * Get the serialized layout.
+ *
+ * @return the serialized layout if we send the buffers on the wire
+ */
+ public List<ArrowBuffer> getBuffersLayout() {
+ return buffersLayout;
+ }
+
+ @Override
+ public int writeTo(FlatBufferBuilder builder) {
+ RecordBatch.startNodesVector(builder, nodes.size());
+ int nodesOffset = FBSerializables.writeAllStructsToVector(builder, nodes);
+ RecordBatch.startBuffersVector(builder, buffers.size());
+ int buffersOffset = FBSerializables.writeAllStructsToVector(builder, buffersLayout);
+ int compressOffset = 0;
+ if (bodyCompression.getCodec() != NoCompressionCodec.COMPRESSION_TYPE) {
+ compressOffset = bodyCompression.writeTo(builder);
+ }
+ RecordBatch.startRecordBatch(builder);
+ RecordBatch.addLength(builder, length);
+ RecordBatch.addNodes(builder, nodesOffset);
+ RecordBatch.addBuffers(builder, buffersOffset);
+ if (bodyCompression.getCodec() != NoCompressionCodec.COMPRESSION_TYPE) {
+ RecordBatch.addCompression(builder, compressOffset);
+ }
+ return RecordBatch.endRecordBatch(builder);
+ }
+
+ @Override
+ public <T> T accepts(ArrowMessageVisitor<T> visitor) {
+ return visitor.visit(this);
+ }
+
+ /**
+ * Releases the buffers.
+ */
+ @Override
+ public void close() {
+ if (!closed) {
+ closed = true;
+ for (ArrowBuf arrowBuf : buffers) {
+ arrowBuf.getReferenceManager().release();
+ }
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "ArrowRecordBatch [length=" + length + ", nodes=" + nodes + ", #buffers=" + buffers.size() +
+ ", buffersLayout=" + buffersLayout + ", closed=" + closed + "]";
+ }
+
+ /**
+ * Computes the size of the serialized body for this recordBatch.
+ */
+ @Override
+ public long computeBodyLength() {
+ long size = 0;
+
+ List<ArrowBuf> buffers = getBuffers();
+ List<ArrowBuffer> buffersLayout = getBuffersLayout();
+ if (buffers.size() != buffersLayout.size()) {
+ throw new IllegalStateException("the layout does not match: " +
+ buffers.size() + " != " + buffersLayout.size());
+ }
+
+ for (int i = 0; i < buffers.size(); i++) {
+ ArrowBuf buffer = buffers.get(i);
+ ArrowBuffer layout = buffersLayout.get(i);
+ size = layout.getOffset() + buffer.readableBytes();
+
+ // round up size to the next multiple of 8
+ size = DataSizeRoundingUtil.roundUpTo8Multiple(size);
+ }
+ return size;
+ }
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/FBSerializable.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/FBSerializable.java
new file mode 100644
index 000000000..6b406b594
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/FBSerializable.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc.message;
+
+import com.google.flatbuffers.FlatBufferBuilder;
+
+/**
+ * Interface for serializing to FlatBuffers.
+ */
+public interface FBSerializable {
+ /**
+ * Returns the number of bytes taken to serialize the data in builder after writing to it.
+ */
+ int writeTo(FlatBufferBuilder builder);
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/FBSerializables.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/FBSerializables.java
new file mode 100644
index 000000000..26736ed91
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/FBSerializables.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc.message;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.arrow.flatbuf.KeyValue;
+
+import com.google.flatbuffers.FlatBufferBuilder;
+
+/**
+ * Utility methods for {@linkplain org.apache.arrow.vector.ipc.message.FBSerializable}s.
+ */
+public class FBSerializables {
+ private FBSerializables() {}
+
+ /**
+ * Writes every element of all to builder and calls {@link FlatBufferBuilder#endVector()} afterwards.
+ * Returns the number of result of calling endVector.
+ */
+ public static int writeAllStructsToVector(FlatBufferBuilder builder, List<? extends FBSerializable> all) {
+ // struct vectors have to be created in reverse order
+ List<? extends FBSerializable> reversed = new ArrayList<>(all);
+ Collections.reverse(reversed);
+ for (FBSerializable element : reversed) {
+ element.writeTo(builder);
+ }
+ return builder.endVector();
+ }
+
+ /**
+ * Writes map data with string type.
+ */
+ public static int writeKeyValues(FlatBufferBuilder builder, Map<String, String> metaData) {
+ int[] metadataOffsets = new int[metaData.size()];
+ Iterator<Map.Entry<String, String>> metadataIterator = metaData.entrySet().iterator();
+ for (int i = 0; i < metadataOffsets.length; i++) {
+ Map.Entry<String, String> kv = metadataIterator.next();
+ int keyOffset = builder.createString(kv.getKey());
+ int valueOffset = builder.createString(kv.getValue());
+ KeyValue.startKeyValue(builder);
+ KeyValue.addKey(builder, keyOffset);
+ KeyValue.addValue(builder, valueOffset);
+ metadataOffsets[i] = KeyValue.endKeyValue(builder);
+ }
+ return org.apache.arrow.flatbuf.Field.createCustomMetadataVector(builder, metadataOffsets);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/IpcOption.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/IpcOption.java
new file mode 100644
index 000000000..51207584f
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/IpcOption.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc.message;
+
+import org.apache.arrow.vector.types.MetadataVersion;
+
+/**
+ * IPC options, now only use for write.
+ */
+public class IpcOption {
+
+ // Write the pre-0.15.0 encapsulated IPC message format
+ // consisting of a 4-byte prefix instead of 8 byte
+ public final boolean write_legacy_ipc_format;
+
+ // The metadata version. Defaults to V5.
+ public final MetadataVersion metadataVersion;
+
+ public IpcOption() {
+ this(false, MetadataVersion.DEFAULT);
+ }
+
+ public IpcOption(boolean writeLegacyIpcFormat, MetadataVersion metadataVersion) {
+ this.write_legacy_ipc_format = writeLegacyIpcFormat;
+ this.metadataVersion = metadataVersion;
+ }
+
+ public static final IpcOption DEFAULT = new IpcOption();
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageChannelReader.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageChannelReader.java
new file mode 100644
index 000000000..1c7968d7f
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageChannelReader.java
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc.message;
+
+import java.io.IOException;
+
+import org.apache.arrow.flatbuf.Message;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.ipc.ReadChannel;
+
+/**
+ * Reads a sequence of messages using a ReadChannel.
+ */
+public class MessageChannelReader implements AutoCloseable {
+ protected ReadChannel in;
+ protected BufferAllocator allocator;
+
+ /**
+ * Construct a MessageReader to read streaming messages from an existing ReadChannel.
+ *
+ * @param in Channel to read messages from
+ * @param allocator BufferAllocator used to read Message body into an ArrowBuf.
+ */
+ public MessageChannelReader(ReadChannel in, BufferAllocator allocator) {
+ this.in = in;
+ this.allocator = allocator;
+ }
+
+ /**
+ * Read a message from the ReadChannel and return a MessageResult containing the Message
+ * metadata and optional message body data. Once the end-of-stream has been reached, a null
+ * value will be returned. If the message has no body, then MessageResult.getBodyBuffer()
+ * returns null.
+ *
+ * @return MessageResult or null if reached end-of-stream
+ * @throws IOException on error
+ */
+ public MessageResult readNext() throws IOException {
+
+ // Read the flatbuf message and check for end-of-stream
+ MessageMetadataResult result = MessageSerializer.readMessage(in);
+ if (result == null) {
+ return null;
+ }
+ Message message = result.getMessage();
+ ArrowBuf bodyBuffer = null;
+
+ // Read message body data if defined in message
+ if (result.messageHasBody()) {
+ long bodyLength = result.getMessageBodyLength();
+ bodyBuffer = MessageSerializer.readMessageBody(in, bodyLength, allocator);
+ }
+
+ return new MessageResult(message, bodyBuffer);
+ }
+
+ /**
+ * Get the number of bytes read from the ReadChannel.
+ *
+ * @return number of bytes
+ */
+ public long bytesRead() {
+ return in.bytesRead();
+ }
+
+ /**
+ * Close the ReadChannel.
+ *
+ * @throws IOException on error
+ */
+ @Override
+ public void close() throws IOException {
+ in.close();
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageMetadataResult.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageMetadataResult.java
new file mode 100644
index 000000000..e4728822d
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageMetadataResult.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc.message;
+
+import java.nio.ByteBuffer;
+
+import org.apache.arrow.flatbuf.Message;
+
+/**
+ * Class to hold resulting Message metadata and buffer containing the serialized Flatbuffer
+ * message when reading messages from a ReadChannel. This handles Message metadata only and
+ * does not include the message body data, which should be subsequently read into an ArrowBuf.
+ */
+public class MessageMetadataResult {
+
+ /**
+ * Construct a container to hold a deserialized Message metadata, and buffer
+ * with the serialized Message as read from a ReadChannel.
+ *
+ * @param messageLength the length of the serialized Flatbuffer message in bytes
+ * @param messageBuffer contains the serialized Flatbuffer Message metadata
+ * @param message the deserialized Flatbuffer Message metadata description
+ */
+ MessageMetadataResult(int messageLength, ByteBuffer messageBuffer, Message message) {
+ this.messageLength = messageLength;
+ this.messageBuffer = messageBuffer;
+ this.message = message;
+ }
+
+ /**
+ * Creates a new {@link MessageMetadataResult} by parsing it from the beginning of the buffer.
+ *
+ * @param messageLength The length of the serialized flatbuffer message in bytes (might not be equal to the buffer
+ * size).
+ */
+ public static MessageMetadataResult create(ByteBuffer buffer, int messageLength) {
+ return new MessageMetadataResult(messageLength, buffer, Message.getRootAsMessage(buffer));
+ }
+
+ /**
+ * Get the length of the message metadata in bytes, not including the body length.
+ *
+ * @return number of bytes in the message metadata buffer.
+ */
+ public int getMessageLength() {
+ return messageLength;
+ }
+
+ /**
+ * Get the buffer containing the raw message metadata bytes, not including the message body data.
+ *
+ * @return buffer containing the message metadata.
+ */
+ public ByteBuffer getMessageBuffer() {
+ return messageBuffer;
+ }
+
+ /**
+ * Returns the bytes remaining in the buffer after parsing the message from it.
+ */
+ public int bytesAfterMessage() {
+ return message.getByteBuffer().remaining();
+ }
+
+ public byte headerType() {
+ return message.headerType();
+ }
+
+ /**
+ * Check if the message is followed by a body. This will be true if the message has a body
+ * length > 0, which indicates that a message body needs to be read from the input source.
+ *
+ * @return true if message has a defined body
+ */
+ public boolean messageHasBody() {
+ return message.bodyLength() > 0;
+ }
+
+ /**
+ * Get the length of the message body.
+ *
+ * @return number of bytes of the message body
+ */
+ public long getMessageBodyLength() {
+ return message.bodyLength();
+ }
+
+ /**
+ * Get the realized flatbuf Message metadata description.
+ *
+ * @return Message metadata
+ */
+ public Message getMessage() {
+ return message;
+ }
+
+ private final int messageLength;
+ private final ByteBuffer messageBuffer;
+ private final Message message;
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageResult.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageResult.java
new file mode 100644
index 000000000..591fbf106
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageResult.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc.message;
+
+import org.apache.arrow.flatbuf.Message;
+import org.apache.arrow.memory.ArrowBuf;
+
+/**
+ * Class to hold the Message metadata and body data when reading messages through a
+ * MessageChannelReader.
+ */
+public class MessageResult {
+
+ /**
+ * Construct with a valid Message metadata and optional ArrowBuf containing message body
+ * data, if any.
+ *
+ * @param message Deserialized Flatbuffer Message metadata description
+ * @param bodyBuffer Optional ArrowBuf containing message body data, null if message has no body
+ */
+ MessageResult(Message message, ArrowBuf bodyBuffer) {
+ this.message = message;
+ this.bodyBuffer = bodyBuffer;
+ }
+
+ /**
+ * Get the Message metadata.
+ *
+ * @return the Flatbuffer Message metadata
+ */
+ public Message getMessage() {
+ return message;
+ }
+
+ /**
+ * Get the message body data.
+ *
+ * @return an ArrowBuf containing the message body data or null if the message has no body
+ */
+ public ArrowBuf getBodyBuffer() {
+ return bodyBuffer;
+ }
+
+ private final Message message;
+ private final ArrowBuf bodyBuffer;
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageSerializer.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageSerializer.java
new file mode 100644
index 000000000..6597e0302
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageSerializer.java
@@ -0,0 +1,736 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc.message;
+
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.arrow.flatbuf.Buffer;
+import org.apache.arrow.flatbuf.DictionaryBatch;
+import org.apache.arrow.flatbuf.FieldNode;
+import org.apache.arrow.flatbuf.Message;
+import org.apache.arrow.flatbuf.MessageHeader;
+import org.apache.arrow.flatbuf.MetadataVersion;
+import org.apache.arrow.flatbuf.RecordBatch;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.compression.NoCompressionCodec;
+import org.apache.arrow.vector.ipc.ReadChannel;
+import org.apache.arrow.vector.ipc.WriteChannel;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+import com.google.flatbuffers.FlatBufferBuilder;
+
+/**
+ * Utility class for serializing Messages. Messages are all serialized a similar way.
+ * 1. 4 byte little endian message header prefix
+ * 2. FB serialized Message: This includes it the body length, which is the serialized
+ * body and the type of the message.
+ * 3. Serialized message.
+ *
+ * <p>For schema messages, the serialization is simply the FB serialized Schema.
+ *
+ * <p>For RecordBatch messages the serialization is:
+ * 1. 4 byte little endian batch metadata header
+ * 2. FB serialized RowBatch
+ * 3. Padding to align to 8 byte boundary.
+ * 4. serialized RowBatch buffers.
+ */
+public class MessageSerializer {
+
+ // This 0xFFFFFFFF value is the first 4 bytes of a valid IPC message
+ public static final int IPC_CONTINUATION_TOKEN = -1;
+
+ /**
+ * Convert an array of 4 bytes in little-endian to an native-endian i32 value.
+ *
+ * @param bytes byte array with minimum length of 4 in little-endian
+ * @return converted an native-endian 32-bit integer
+ */
+ public static int bytesToInt(byte[] bytes) {
+ return ((bytes[3] & 255) << 24) +
+ ((bytes[2] & 255) << 16) +
+ ((bytes[1] & 255) << 8) +
+ ((bytes[0] & 255));
+ }
+
+ /**
+ * Convert an integer to a little endian 4 byte array.
+ *
+ * @param value integer value input
+ * @param bytes existing byte array with minimum length of 4 to contain the conversion output
+ */
+ public static void intToBytes(int value, byte[] bytes) {
+ bytes[3] = (byte) (value >>> 24);
+ bytes[2] = (byte) (value >>> 16);
+ bytes[1] = (byte) (value >>> 8);
+ bytes[0] = (byte) (value);
+ }
+
+ /**
+ * Convert a long to a little-endian 8 byte array.
+ *
+ * @param value long value input
+ * @param bytes existing byte array with minimum length of 8 to contain the conversion output
+ */
+ public static void longToBytes(long value, byte[] bytes) {
+ bytes[7] = (byte) (value >>> 56);
+ bytes[6] = (byte) (value >>> 48);
+ bytes[5] = (byte) (value >>> 40);
+ bytes[4] = (byte) (value >>> 32);
+ bytes[3] = (byte) (value >>> 24);
+ bytes[2] = (byte) (value >>> 16);
+ bytes[1] = (byte) (value >>> 8);
+ bytes[0] = (byte) (value);
+ }
+
+ public static int writeMessageBuffer(WriteChannel out, int messageLength, ByteBuffer messageBuffer)
+ throws IOException {
+ return writeMessageBuffer(out, messageLength, messageBuffer, IpcOption.DEFAULT);
+ }
+
+ /**
+ * Write the serialized Message metadata, prefixed by the length, to the output Channel. This
+ * ensures that it aligns to an 8 byte boundary and will adjust the message length to include
+ * any padding used for alignment.
+ *
+ * @param out Output Channel
+ * @param messageLength Number of bytes in the message buffer, written as little Endian prefix
+ * @param messageBuffer Message metadata buffer to be written, this does not include any
+ * message body data which should be subsequently written to the Channel
+ * @param option IPC write options
+ * @return Number of bytes written
+ * @throws IOException on error
+ */
+ public static int writeMessageBuffer(WriteChannel out, int messageLength, ByteBuffer messageBuffer, IpcOption option)
+ throws IOException {
+
+ // if write the pre-0.15.0 encapsulated IPC message format consisting of a 4-byte prefix instead of 8 byte
+ int prefixSize = option.write_legacy_ipc_format ? 4 : 8;
+
+ // ensure that message aligns to 8 byte padding - prefix_size bytes, then message body
+ if ((messageLength + prefixSize ) % 8 != 0) {
+ messageLength += 8 - (messageLength + prefixSize) % 8;
+ }
+ if (!option.write_legacy_ipc_format) {
+ out.writeIntLittleEndian(IPC_CONTINUATION_TOKEN);
+ }
+ out.writeIntLittleEndian(messageLength);
+ out.write(messageBuffer);
+ out.align();
+
+ // any bytes written are already captured by our size modification above
+ return messageLength + prefixSize;
+ }
+
+ /**
+ * Serialize a schema object.
+ */
+ public static long serialize(WriteChannel out, Schema schema) throws IOException {
+ return serialize(out, schema, IpcOption.DEFAULT);
+ }
+
+ /**
+ * Serialize a schema object.
+ *
+ * @param out where to write the schema
+ * @param schema the object to serialize to out
+ * @return the number of bytes written
+ * @throws IOException if something went wrong
+ */
+ public static long serialize(WriteChannel out, Schema schema, IpcOption option) throws IOException {
+ long start = out.getCurrentPosition();
+ Preconditions.checkArgument(start % 8 == 0, "out is not aligned");
+
+ ByteBuffer serializedMessage = serializeMetadata(schema, option);
+
+ int messageLength = serializedMessage.remaining();
+
+ int bytesWritten = writeMessageBuffer(out, messageLength, serializedMessage, option);
+ Preconditions.checkArgument(bytesWritten % 8 == 0, "out is not aligned");
+ return bytesWritten;
+ }
+
+ /**
+ * Returns the serialized flatbuffer bytes of the schema wrapped in a message table.
+ */
+ @Deprecated
+ public static ByteBuffer serializeMetadata(Schema schema) {
+ return serializeMetadata(schema, IpcOption.DEFAULT);
+ }
+
+ /**
+ * Returns the serialized flatbuffer bytes of the schema wrapped in a message table.
+ */
+ public static ByteBuffer serializeMetadata(Schema schema, IpcOption writeOption) {
+ FlatBufferBuilder builder = new FlatBufferBuilder();
+ int schemaOffset = schema.getSchema(builder);
+ return MessageSerializer.serializeMessage(builder, org.apache.arrow.flatbuf.MessageHeader.Schema, schemaOffset, 0,
+ writeOption);
+ }
+
+ /**
+ * Deserializes an Arrow Schema object from a schema message. Format is from serialize().
+ *
+ * @param schemaMessage a Message of type MessageHeader.Schema
+ * @return the deserialized Arrow Schema
+ */
+ public static Schema deserializeSchema(Message schemaMessage) {
+ Preconditions.checkArgument(schemaMessage.headerType() == MessageHeader.Schema,
+ "Expected schema but result was: %s", schemaMessage.headerType());
+ return Schema.convertSchema((org.apache.arrow.flatbuf.Schema)
+ schemaMessage.header(new org.apache.arrow.flatbuf.Schema()));
+ }
+
+ /**
+ * Deserializes an Arrow Schema read from the input channel. Format is from serialize().
+ *
+ * @param in the channel to deserialize from
+ * @return the deserialized Arrow Schema
+ * @throws IOException if something went wrong
+ */
+ public static Schema deserializeSchema(ReadChannel in) throws IOException {
+ MessageMetadataResult result = readMessage(in);
+ if (result == null) {
+ throw new IOException("Unexpected end of input when reading Schema");
+ }
+ if (result.getMessage().headerType() != MessageHeader.Schema) {
+ throw new IOException("Expected schema but header was " + result.getMessage().headerType());
+ }
+ return deserializeSchema(result);
+ }
+
+ /**
+ * Deserializes an Arrow Schema object from a {@link MessageMetadataResult}. Format is from serialize().
+ *
+ * @param message a Message of type MessageHeader.Schema
+ * @return the deserialized Arrow Schema
+ */
+ public static Schema deserializeSchema(MessageMetadataResult message) {
+ return deserializeSchema(message.getMessage());
+ }
+
+ /**
+ * Serializes an ArrowRecordBatch. Returns the offset and length of the written batch.
+ */
+ public static ArrowBlock serialize(WriteChannel out, ArrowRecordBatch batch) throws IOException {
+ return serialize(out, batch, IpcOption.DEFAULT);
+ }
+
+ /**
+ * Serializes an ArrowRecordBatch. Returns the offset and length of the written batch.
+ *
+ * @param out where to write the batch
+ * @param batch the object to serialize to out
+ * @return the serialized block metadata
+ * @throws IOException if something went wrong
+ */
+ public static ArrowBlock serialize(WriteChannel out, ArrowRecordBatch batch, IpcOption option) throws IOException {
+
+ long start = out.getCurrentPosition();
+ long bodyLength = batch.computeBodyLength();
+ Preconditions.checkArgument(bodyLength % 8 == 0, "batch is not aligned");
+
+ ByteBuffer serializedMessage = serializeMetadata(batch, option);
+
+ int metadataLength = serializedMessage.remaining();
+
+ int prefixSize = 4;
+ if (!option.write_legacy_ipc_format) {
+ out.writeIntLittleEndian(IPC_CONTINUATION_TOKEN);
+ prefixSize = 8;
+ }
+
+ // calculate alignment bytes so that metadata length points to the correct location after alignment
+ int padding = (int) ((start + metadataLength + prefixSize) % 8);
+ if (padding != 0) {
+ metadataLength += (8 - padding);
+ }
+
+ out.writeIntLittleEndian(metadataLength);
+ out.write(serializedMessage);
+
+ // Align the output to 8 byte boundary.
+ out.align();
+
+ long bufferLength = writeBatchBuffers(out, batch);
+ Preconditions.checkArgument(bufferLength % 8 == 0, "out is not aligned");
+
+ // Metadata size in the Block account for the size prefix
+ return new ArrowBlock(start, metadataLength + prefixSize, bufferLength);
+ }
+
+ /**
+ * Write the Arrow buffers of the record batch to the output channel.
+ *
+ * @param out the output channel to write the buffers to
+ * @param batch an ArrowRecordBatch containing buffers to be written
+ * @return the number of bytes written
+ * @throws IOException on error
+ */
+ public static long writeBatchBuffers(WriteChannel out, ArrowRecordBatch batch) throws IOException {
+ long bufferStart = out.getCurrentPosition();
+ List<ArrowBuf> buffers = batch.getBuffers();
+ List<ArrowBuffer> buffersLayout = batch.getBuffersLayout();
+
+ for (int i = 0; i < buffers.size(); i++) {
+ ArrowBuf buffer = buffers.get(i);
+ ArrowBuffer layout = buffersLayout.get(i);
+ long startPosition = bufferStart + layout.getOffset();
+ if (startPosition != out.getCurrentPosition()) {
+ out.writeZeros(startPosition - out.getCurrentPosition());
+ }
+ out.write(buffer);
+ if (out.getCurrentPosition() != startPosition + layout.getSize()) {
+ throw new IllegalStateException("wrong buffer size: " + out.getCurrentPosition() +
+ " != " + startPosition + layout.getSize());
+ }
+ }
+ out.align();
+ return out.getCurrentPosition() - bufferStart;
+ }
+
+ /**
+ * Returns the serialized form of {@link RecordBatch} wrapped in a {@link org.apache.arrow.flatbuf.Message}.
+ */
+ @Deprecated
+ public static ByteBuffer serializeMetadata(ArrowMessage message) {
+ return serializeMetadata(message, IpcOption.DEFAULT);
+ }
+
+ /**
+ * Returns the serialized form of {@link RecordBatch} wrapped in a {@link org.apache.arrow.flatbuf.Message}.
+ */
+ public static ByteBuffer serializeMetadata(ArrowMessage message, IpcOption writeOption) {
+ FlatBufferBuilder builder = new FlatBufferBuilder();
+ int batchOffset = message.writeTo(builder);
+ return serializeMessage(builder, message.getMessageType(), batchOffset,
+ message.computeBodyLength(), writeOption);
+ }
+
+ /**
+ * Deserializes an ArrowRecordBatch from a record batch message and data in an ArrowBuf.
+ *
+ * @param recordBatchMessage a Message of type MessageHeader.RecordBatch
+ * @param bodyBuffer Arrow buffer containing the RecordBatch data
+ * @return the deserialized ArrowRecordBatch
+ * @throws IOException if something went wrong
+ */
+ public static ArrowRecordBatch deserializeRecordBatch(Message recordBatchMessage, ArrowBuf bodyBuffer)
+ throws IOException {
+ RecordBatch recordBatchFB = (RecordBatch) recordBatchMessage.header(new RecordBatch());
+ return deserializeRecordBatch(recordBatchFB, bodyBuffer);
+ }
+
+ /**
+ * Deserializes an ArrowRecordBatch read from the input channel. This uses the given allocator
+ * to create an ArrowBuf for the batch body data.
+ *
+ * @param in Channel to read a RecordBatch message and data from
+ * @param allocator BufferAllocator to allocate an Arrow buffer to read message body data
+ * @return the deserialized ArrowRecordBatch
+ * @throws IOException on error
+ */
+ public static ArrowRecordBatch deserializeRecordBatch(ReadChannel in, BufferAllocator allocator) throws IOException {
+ MessageMetadataResult result = readMessage(in);
+ if (result == null) {
+ throw new IOException("Unexpected end of input when reading a RecordBatch");
+ }
+ if (result.getMessage().headerType() != MessageHeader.RecordBatch) {
+ throw new IOException("Expected RecordBatch but header was " + result.getMessage().headerType());
+ }
+ long bodyLength = result.getMessageBodyLength();
+ ArrowBuf bodyBuffer = readMessageBody(in, bodyLength, allocator);
+ return deserializeRecordBatch(result.getMessage(), bodyBuffer);
+ }
+
+ /**
+ * Deserializes an ArrowRecordBatch knowing the size of the entire message up front. This
+ * minimizes the number of reads to the underlying stream.
+ *
+ * @param in the channel to deserialize from
+ * @param block the object to deserialize to
+ * @param alloc to allocate buffers
+ * @return the deserialized ArrowRecordBatch
+ * @throws IOException if something went wrong
+ */
+ public static ArrowRecordBatch deserializeRecordBatch(ReadChannel in, ArrowBlock block, BufferAllocator alloc)
+ throws IOException {
+ // Metadata length contains prefix_size bytes plus byte padding
+ long totalLen = block.getMetadataLength() + block.getBodyLength();
+
+ ArrowBuf buffer = alloc.buffer(totalLen);
+ if (in.readFully(buffer, totalLen) != totalLen) {
+ throw new IOException("Unexpected end of input trying to read batch.");
+ }
+
+ int prefixSize = buffer.getInt(0) == IPC_CONTINUATION_TOKEN ? 8 : 4;
+
+ ArrowBuf metadataBuffer = buffer.slice(prefixSize, block.getMetadataLength() - prefixSize);
+
+ Message messageFB =
+ Message.getRootAsMessage(metadataBuffer.nioBuffer().asReadOnlyBuffer());
+
+ RecordBatch recordBatchFB = (RecordBatch) messageFB.header(new RecordBatch());
+
+ // Now read the body
+ final ArrowBuf body = buffer.slice(block.getMetadataLength(),
+ totalLen - block.getMetadataLength());
+ return deserializeRecordBatch(recordBatchFB, body);
+ }
+
+ /**
+ * Deserializes an ArrowRecordBatch given the Flatbuffer metadata and in-memory body.
+ *
+ * @param recordBatchFB Deserialized FlatBuffer record batch
+ * @param body Read body of the record batch
+ * @return ArrowRecordBatch from metadata and in-memory body
+ * @throws IOException on error
+ */
+ public static ArrowRecordBatch deserializeRecordBatch(RecordBatch recordBatchFB, ArrowBuf body) throws IOException {
+ // Now read the body
+ int nodesLength = recordBatchFB.nodesLength();
+ List<ArrowFieldNode> nodes = new ArrayList<>();
+ for (int i = 0; i < nodesLength; ++i) {
+ FieldNode node = recordBatchFB.nodes(i);
+ if ((int) node.length() != node.length() ||
+ (int) node.nullCount() != node.nullCount()) {
+ throw new IOException("Cannot currently deserialize record batches with " +
+ "node length larger than INT_MAX records.");
+ }
+ nodes.add(new ArrowFieldNode(node.length(), node.nullCount()));
+ }
+ List<ArrowBuf> buffers = new ArrayList<>();
+ for (int i = 0; i < recordBatchFB.buffersLength(); ++i) {
+ Buffer bufferFB = recordBatchFB.buffers(i);
+ ArrowBuf vectorBuffer = body.slice(bufferFB.offset(), bufferFB.length());
+ buffers.add(vectorBuffer);
+ }
+
+ ArrowBodyCompression bodyCompression = recordBatchFB.compression() == null ?
+ NoCompressionCodec.DEFAULT_BODY_COMPRESSION
+ : new ArrowBodyCompression(recordBatchFB.compression().codec(), recordBatchFB.compression().method());
+
+ if ((int) recordBatchFB.length() != recordBatchFB.length()) {
+ throw new IOException("Cannot currently deserialize record batches with more than INT_MAX records.");
+ }
+ ArrowRecordBatch arrowRecordBatch =
+ new ArrowRecordBatch(checkedCastToInt(recordBatchFB.length()), nodes, buffers, bodyCompression);
+ body.getReferenceManager().release();
+ return arrowRecordBatch;
+ }
+
+ /**
+ * Reads a record batch based on the metadata in serializedMessage and the underlying data buffer.
+ */
+ public static ArrowRecordBatch deserializeRecordBatch(MessageMetadataResult serializedMessage,
+ ArrowBuf underlying) throws
+ IOException {
+ return deserializeRecordBatch(serializedMessage.getMessage(), underlying);
+ }
+
+ public static ArrowBlock serialize(WriteChannel out, ArrowDictionaryBatch batch) throws IOException {
+ return serialize(out, batch, IpcOption.DEFAULT);
+ }
+
+ /**
+ * Serializes a dictionary ArrowRecordBatch. Returns the offset and length of the written batch.
+ *
+ * @param out where to serialize
+ * @param batch the batch to serialize
+ * @param option options for IPC
+ * @return the metadata of the serialized block
+ * @throws IOException if something went wrong
+ */
+ public static ArrowBlock serialize(WriteChannel out, ArrowDictionaryBatch batch, IpcOption option)
+ throws IOException {
+ long start = out.getCurrentPosition();
+
+ long bodyLength = batch.computeBodyLength();
+ Preconditions.checkArgument(bodyLength % 8 == 0, "batch is not aligned");
+
+ ByteBuffer serializedMessage = serializeMetadata(batch, option);
+
+ int metadataLength = serializedMessage.remaining();
+
+ int prefixSize = 4;
+ if (!option.write_legacy_ipc_format) {
+ out.writeIntLittleEndian(IPC_CONTINUATION_TOKEN);
+ prefixSize = 8;
+ }
+
+ // calculate alignment bytes so that metadata length points to the correct location after alignment
+ int padding = (int) ((start + metadataLength + prefixSize) % 8);
+ if (padding != 0) {
+ metadataLength += (8 - padding);
+ }
+
+ out.writeIntLittleEndian(metadataLength);
+ out.write(serializedMessage);
+
+ // Align the output to 8 byte boundary.
+ out.align();
+
+ // write the embedded record batch
+ long bufferLength = writeBatchBuffers(out, batch.getDictionary());
+ Preconditions.checkArgument(bufferLength % 8 == 0, "out is not aligned");
+
+ // Metadata size in the Block account for the size prefix
+ return new ArrowBlock(start, metadataLength + prefixSize, bufferLength);
+ }
+
+ /**
+ * Deserializes an ArrowDictionaryBatch from a dictionary batch Message and data in an ArrowBuf.
+ *
+ * @param message a message of type MessageHeader.DictionaryBatch
+ * @param bodyBuffer Arrow buffer containing the DictionaryBatch data
+ * of type MessageHeader.DictionaryBatch
+ * @return the deserialized ArrowDictionaryBatch
+ * @throws IOException if something went wrong
+ */
+ public static ArrowDictionaryBatch deserializeDictionaryBatch(Message message, ArrowBuf bodyBuffer)
+ throws IOException {
+ DictionaryBatch dictionaryBatchFB = (DictionaryBatch) message.header(new DictionaryBatch());
+ ArrowRecordBatch recordBatch = deserializeRecordBatch(dictionaryBatchFB.data(), bodyBuffer);
+ return new ArrowDictionaryBatch(dictionaryBatchFB.id(), recordBatch, dictionaryBatchFB.isDelta());
+ }
+
+ /**
+ * Deserializes an ArrowDictionaryBatch from a dictionary batch Message and data in an ArrowBuf.
+ *
+ * @param message a message of type MessageHeader.DictionaryBatch
+ * @param bodyBuffer Arrow buffer containing the DictionaryBatch data
+ * of type MessageHeader.DictionaryBatch
+ * @return the deserialized ArrowDictionaryBatch
+ * @throws IOException if something went wrong
+ */
+ public static ArrowDictionaryBatch deserializeDictionaryBatch(MessageMetadataResult message, ArrowBuf bodyBuffer)
+ throws IOException {
+ return deserializeDictionaryBatch(message.getMessage(), bodyBuffer);
+ }
+
+ /**
+ * Deserializes an ArrowDictionaryBatch read from the input channel. This uses the given allocator
+ * to create an ArrowBuf for the batch body data.
+ *
+ * @param in Channel to read a DictionaryBatch message and data from
+ * @param allocator BufferAllocator to allocate an Arrow buffer to read message body data
+ * @return the deserialized ArrowDictionaryBatch
+ * @throws IOException on error
+ */
+ public static ArrowDictionaryBatch deserializeDictionaryBatch(ReadChannel in, BufferAllocator allocator)
+ throws IOException {
+ MessageMetadataResult result = readMessage(in);
+ if (result == null) {
+ throw new IOException("Unexpected end of input when reading a DictionaryBatch");
+ }
+ if (result.getMessage().headerType() != MessageHeader.DictionaryBatch) {
+ throw new IOException("Expected DictionaryBatch but header was " + result.getMessage().headerType());
+ }
+ long bodyLength = result.getMessageBodyLength();
+ ArrowBuf bodyBuffer = readMessageBody(in, bodyLength, allocator);
+ return deserializeDictionaryBatch(result.getMessage(), bodyBuffer);
+ }
+
+ /**
+ * Deserializes a DictionaryBatch knowing the size of the entire message up front. This
+ * minimizes the number of reads to the underlying stream.
+ *
+ * @param in where to read from
+ * @param block block metadata for deserializing
+ * @param alloc to allocate new buffers
+ * @return the deserialized ArrowDictionaryBatch
+ * @throws IOException if something went wrong
+ */
+ public static ArrowDictionaryBatch deserializeDictionaryBatch(
+ ReadChannel in,
+ ArrowBlock block,
+ BufferAllocator alloc) throws IOException {
+ // Metadata length contains integer prefix plus byte padding
+ long totalLen = block.getMetadataLength() + block.getBodyLength();
+
+ ArrowBuf buffer = alloc.buffer(totalLen);
+ if (in.readFully(buffer, totalLen) != totalLen) {
+ throw new IOException("Unexpected end of input trying to read batch.");
+ }
+
+ int prefixSize = buffer.getInt(0) == IPC_CONTINUATION_TOKEN ? 8 : 4;
+
+ ArrowBuf metadataBuffer = buffer.slice(prefixSize, block.getMetadataLength() - prefixSize);
+
+ Message messageFB =
+ Message.getRootAsMessage(metadataBuffer.nioBuffer().asReadOnlyBuffer());
+
+ DictionaryBatch dictionaryBatchFB = (DictionaryBatch) messageFB.header(new DictionaryBatch());
+
+ // Now read the body
+ final ArrowBuf body = buffer.slice(block.getMetadataLength(),
+ totalLen - block.getMetadataLength());
+ ArrowRecordBatch recordBatch = deserializeRecordBatch(dictionaryBatchFB.data(), body);
+ return new ArrowDictionaryBatch(dictionaryBatchFB.id(), recordBatch, dictionaryBatchFB.isDelta());
+ }
+
+ /**
+ * Deserialize a message that is either an ArrowDictionaryBatch or ArrowRecordBatch.
+ *
+ * @param reader MessageChannelReader to read a sequence of messages from a ReadChannel
+ * @return The deserialized record batch
+ * @throws IOException if the message is not an ArrowDictionaryBatch or ArrowRecordBatch
+ */
+ public static ArrowMessage deserializeMessageBatch(MessageChannelReader reader) throws IOException {
+ MessageResult result = reader.readNext();
+ if (result == null) {
+ return null;
+ } else if (result.getMessage().bodyLength() > Integer.MAX_VALUE) {
+ throw new IOException("Cannot currently deserialize record batches over 2GB");
+ }
+
+ if (result.getMessage().version() != MetadataVersion.V4 &&
+ result.getMessage().version() != MetadataVersion.V5) {
+ throw new IOException("Received metadata with an incompatible version number: " + result.getMessage().version());
+ }
+
+ switch (result.getMessage().headerType()) {
+ case MessageHeader.RecordBatch:
+ return deserializeRecordBatch(result.getMessage(), result.getBodyBuffer());
+ case MessageHeader.DictionaryBatch:
+ return deserializeDictionaryBatch(result.getMessage(), result.getBodyBuffer());
+ default:
+ throw new IOException("Unexpected message header type " + result.getMessage().headerType());
+ }
+ }
+
+ /**
+ * Deserialize a message that is either an ArrowDictionaryBatch or ArrowRecordBatch.
+ *
+ * @param in ReadChannel to read messages from
+ * @param alloc Allocator for message data
+ * @return The deserialized record batch
+ * @throws IOException if the message is not an ArrowDictionaryBatch or ArrowRecordBatch
+ */
+ public static ArrowMessage deserializeMessageBatch(ReadChannel in, BufferAllocator alloc) throws IOException {
+ return deserializeMessageBatch(new MessageChannelReader(in, alloc));
+ }
+
+ @Deprecated
+ public static ByteBuffer serializeMessage(
+ FlatBufferBuilder builder,
+ byte headerType,
+ int headerOffset,
+ long bodyLength) {
+ return serializeMessage(builder, headerType, headerOffset, bodyLength, IpcOption.DEFAULT);
+ }
+
+ /**
+ * Serializes a message header.
+ *
+ * @param builder to write the flatbuf to
+ * @param headerType headerType field
+ * @param headerOffset header offset field
+ * @param bodyLength body length field
+ * @param writeOption IPC write options
+ * @return the corresponding ByteBuffer
+ */
+ public static ByteBuffer serializeMessage(
+ FlatBufferBuilder builder,
+ byte headerType,
+ int headerOffset,
+ long bodyLength,
+ IpcOption writeOption) {
+ Message.startMessage(builder);
+ Message.addHeaderType(builder, headerType);
+ Message.addHeader(builder, headerOffset);
+ Message.addVersion(builder, writeOption.metadataVersion.toFlatbufID());
+ Message.addBodyLength(builder, bodyLength);
+ builder.finish(Message.endMessage(builder));
+ return builder.dataBuffer();
+ }
+
+ /**
+ * Read a Message from the input channel and return a MessageMetadataResult that contains the
+ * Message metadata, buffer containing the serialized Message metadata as read, and length of the
+ * Message in bytes. Returns null if the end-of-stream has been reached.
+ *
+ * @param in ReadChannel to read messages from
+ * @return MessageMetadataResult with deserialized Message metadata and message information if
+ * a valid Message was read, or null if end-of-stream
+ * @throws IOException on error
+ */
+ public static MessageMetadataResult readMessage(ReadChannel in) throws IOException {
+
+ // Read the message size. There is an i32 little endian prefix.
+ ByteBuffer buffer = ByteBuffer.allocate(4);
+ if (in.readFully(buffer) == 4) {
+
+ int messageLength = MessageSerializer.bytesToInt(buffer.array());
+ if (messageLength == IPC_CONTINUATION_TOKEN) {
+ buffer.clear();
+ // ARROW-6313, if the first 4 bytes are continuation message, read the next 4 for the length
+ if (in.readFully(buffer) == 4) {
+ messageLength = MessageSerializer.bytesToInt(buffer.array());
+ }
+ }
+
+ // Length of 0 indicates end of stream
+ if (messageLength != 0) {
+
+ // Read the message into the buffer.
+ ByteBuffer messageBuffer = ByteBuffer.allocate(messageLength);
+ if (in.readFully(messageBuffer) != messageLength) {
+ throw new IOException(
+ "Unexpected end of stream trying to read message.");
+ }
+ messageBuffer.rewind();
+
+ // Load the message.
+ Message message = Message.getRootAsMessage(messageBuffer);
+
+ return new MessageMetadataResult(messageLength, messageBuffer, message);
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Read a Message body from the in channel into an ArrowBuf.
+ *
+ * @param in ReadChannel to read message body from
+ * @param bodyLength Length in bytes of the message body to read
+ * @param allocator Allocate the ArrowBuf to contain message body data
+ * @return an ArrowBuf containing the message body data
+ * @throws IOException on error
+ */
+ public static ArrowBuf readMessageBody(ReadChannel in, long bodyLength,
+ BufferAllocator allocator) throws IOException {
+ ArrowBuf bodyBuffer = allocator.buffer(bodyLength);
+ try {
+ if (in.readFully(bodyBuffer, bodyLength) != bodyLength) {
+ throw new IOException("Unexpected end of input trying to read batch.");
+ }
+ } catch (RuntimeException | IOException e) {
+ bodyBuffer.close();
+ throw e;
+ }
+ return bodyBuffer;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/DateUnit.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/DateUnit.java
new file mode 100644
index 000000000..8fce12e83
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/DateUnit.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.types;
+
+/**
+ * Resolutions that dates can be stored at.
+ */
+public enum DateUnit {
+ /** Days since epoch. */
+ DAY(org.apache.arrow.flatbuf.DateUnit.DAY),
+ /** Milliseconds since epoch. */
+ MILLISECOND(org.apache.arrow.flatbuf.DateUnit.MILLISECOND);
+
+ private static final DateUnit[] valuesByFlatbufId = new DateUnit[DateUnit.values().length];
+
+ static {
+ for (DateUnit v : DateUnit.values()) {
+ valuesByFlatbufId[v.flatbufID] = v;
+ }
+ }
+
+ private final short flatbufID;
+
+ DateUnit(short flatbufID) {
+ this.flatbufID = flatbufID;
+ }
+
+ public short getFlatbufID() {
+ return flatbufID;
+ }
+
+ public static DateUnit fromFlatbufID(short id) {
+ return valuesByFlatbufId[id];
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/FloatingPointPrecision.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/FloatingPointPrecision.java
new file mode 100644
index 000000000..c52fc1243
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/FloatingPointPrecision.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.types;
+
+import org.apache.arrow.flatbuf.Precision;
+
+/**
+ * Precisions of primitive floating point numbers.
+ */
+public enum FloatingPointPrecision {
+ /** 16-bit (not a standard java type). */
+ HALF(Precision.HALF),
+ /** 32-bit (i.e. float in java). */
+ SINGLE(Precision.SINGLE),
+ /** 64-bit (i.e. double in java). */
+ DOUBLE(Precision.DOUBLE);
+
+ private static final FloatingPointPrecision[] valuesByFlatbufId =
+ new FloatingPointPrecision[FloatingPointPrecision.values().length];
+
+ static {
+ for (FloatingPointPrecision v : FloatingPointPrecision.values()) {
+ valuesByFlatbufId[v.flatbufID] = v;
+ }
+ }
+
+ private short flatbufID;
+
+ private FloatingPointPrecision(short flatbufID) {
+ this.flatbufID = flatbufID;
+ }
+
+ public short getFlatbufID() {
+ return flatbufID;
+ }
+
+ public static FloatingPointPrecision fromFlatbufID(short id) {
+ return valuesByFlatbufId[id];
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/IntervalUnit.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/IntervalUnit.java
new file mode 100644
index 000000000..1b17240d0
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/IntervalUnit.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.types;
+
+/**
+ * Resolutions for Interval Vectors.
+ */
+public enum IntervalUnit {
+ /** Values are stored as number of months (which can be converted into years and months via division). */
+ YEAR_MONTH(org.apache.arrow.flatbuf.IntervalUnit.YEAR_MONTH),
+ /** Values are stored as some number of days and some number of milliseconds within that day. */
+ DAY_TIME(org.apache.arrow.flatbuf.IntervalUnit.DAY_TIME),
+ /** Values are stored as number of months, days and nanoseconds. */
+ MONTH_DAY_NANO(org.apache.arrow.flatbuf.IntervalUnit.MONTH_DAY_NANO);
+
+ private static final IntervalUnit[] valuesByFlatbufId = new IntervalUnit[IntervalUnit.values().length];
+
+ static {
+ for (IntervalUnit v : IntervalUnit.values()) {
+ valuesByFlatbufId[v.flatbufID] = v;
+ }
+ }
+
+ private short flatbufID;
+
+ private IntervalUnit(short flatbufID) {
+ this.flatbufID = flatbufID;
+ }
+
+ public short getFlatbufID() {
+ return flatbufID;
+ }
+
+ public static IntervalUnit fromFlatbufID(short id) {
+ return valuesByFlatbufId[id];
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/MetadataVersion.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/MetadataVersion.java
new file mode 100644
index 000000000..a0e281960
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/MetadataVersion.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.types;
+
+/**
+ * Metadata version for Arrow metadata.
+ */
+public enum MetadataVersion {
+ /// 0.1.0
+ V1(org.apache.arrow.flatbuf.MetadataVersion.V1),
+
+ /// 0.2.0
+ V2(org.apache.arrow.flatbuf.MetadataVersion.V2),
+
+ /// 0.3.0 to 0.7.1
+ V3(org.apache.arrow.flatbuf.MetadataVersion.V3),
+
+ /// 0.8.0 to 0.17.1
+ V4(org.apache.arrow.flatbuf.MetadataVersion.V4),
+
+ /// >= 1.0.0
+ V5(org.apache.arrow.flatbuf.MetadataVersion.V5),
+
+ ;
+
+ public static final MetadataVersion DEFAULT = V5;
+
+ private static final MetadataVersion[] valuesByFlatbufId =
+ new MetadataVersion[MetadataVersion.values().length];
+
+ static {
+ for (MetadataVersion v : MetadataVersion.values()) {
+ valuesByFlatbufId[v.flatbufID] = v;
+ }
+ }
+
+ private final short flatbufID;
+
+ MetadataVersion(short flatbufID) {
+ this.flatbufID = flatbufID;
+ }
+
+ public short toFlatbufID() {
+ return flatbufID;
+ }
+
+ public static MetadataVersion fromFlatbufID(short id) {
+ return valuesByFlatbufId[id];
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/TimeUnit.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/TimeUnit.java
new file mode 100644
index 000000000..dcaebba48
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/TimeUnit.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.types;
+
+/**
+ * Resolutions that times can be stored with.
+ */
+public enum TimeUnit {
+ SECOND(org.apache.arrow.flatbuf.TimeUnit.SECOND),
+ MILLISECOND(org.apache.arrow.flatbuf.TimeUnit.MILLISECOND),
+ MICROSECOND(org.apache.arrow.flatbuf.TimeUnit.MICROSECOND),
+ NANOSECOND(org.apache.arrow.flatbuf.TimeUnit.NANOSECOND);
+
+ private static final TimeUnit[] valuesByFlatbufId = new TimeUnit[TimeUnit.values().length];
+
+ static {
+ for (TimeUnit v : TimeUnit.values()) {
+ valuesByFlatbufId[v.flatbufID] = v;
+ }
+ }
+
+ private final short flatbufID;
+
+ TimeUnit(short flatbufID) {
+ this.flatbufID = flatbufID;
+ }
+
+ public short getFlatbufID() {
+ return flatbufID;
+ }
+
+ public static TimeUnit fromFlatbufID(short id) {
+ return valuesByFlatbufId[id];
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java
new file mode 100644
index 000000000..d4c827859
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java
@@ -0,0 +1,1016 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.types;
+
+import static org.apache.arrow.vector.types.FloatingPointPrecision.DOUBLE;
+import static org.apache.arrow.vector.types.FloatingPointPrecision.SINGLE;
+import static org.apache.arrow.vector.types.UnionMode.Dense;
+import static org.apache.arrow.vector.types.UnionMode.Sparse;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.BitVector;
+import org.apache.arrow.vector.DateDayVector;
+import org.apache.arrow.vector.DateMilliVector;
+import org.apache.arrow.vector.Decimal256Vector;
+import org.apache.arrow.vector.DecimalVector;
+import org.apache.arrow.vector.DurationVector;
+import org.apache.arrow.vector.ExtensionTypeVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.FixedSizeBinaryVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.IntervalDayVector;
+import org.apache.arrow.vector.IntervalMonthDayNanoVector;
+import org.apache.arrow.vector.IntervalYearVector;
+import org.apache.arrow.vector.LargeVarBinaryVector;
+import org.apache.arrow.vector.LargeVarCharVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.SmallIntVector;
+import org.apache.arrow.vector.TimeMicroVector;
+import org.apache.arrow.vector.TimeMilliVector;
+import org.apache.arrow.vector.TimeNanoVector;
+import org.apache.arrow.vector.TimeSecVector;
+import org.apache.arrow.vector.TimeStampMicroTZVector;
+import org.apache.arrow.vector.TimeStampMicroVector;
+import org.apache.arrow.vector.TimeStampMilliTZVector;
+import org.apache.arrow.vector.TimeStampMilliVector;
+import org.apache.arrow.vector.TimeStampNanoTZVector;
+import org.apache.arrow.vector.TimeStampNanoVector;
+import org.apache.arrow.vector.TimeStampSecTZVector;
+import org.apache.arrow.vector.TimeStampSecVector;
+import org.apache.arrow.vector.TinyIntVector;
+import org.apache.arrow.vector.UInt1Vector;
+import org.apache.arrow.vector.UInt2Vector;
+import org.apache.arrow.vector.UInt4Vector;
+import org.apache.arrow.vector.UInt8Vector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.VarBinaryVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.complex.DenseUnionVector;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.MapVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.complex.impl.BigIntWriterImpl;
+import org.apache.arrow.vector.complex.impl.BitWriterImpl;
+import org.apache.arrow.vector.complex.impl.DateDayWriterImpl;
+import org.apache.arrow.vector.complex.impl.DateMilliWriterImpl;
+import org.apache.arrow.vector.complex.impl.Decimal256WriterImpl;
+import org.apache.arrow.vector.complex.impl.DecimalWriterImpl;
+import org.apache.arrow.vector.complex.impl.DenseUnionWriter;
+import org.apache.arrow.vector.complex.impl.DurationWriterImpl;
+import org.apache.arrow.vector.complex.impl.FixedSizeBinaryWriterImpl;
+import org.apache.arrow.vector.complex.impl.Float4WriterImpl;
+import org.apache.arrow.vector.complex.impl.Float8WriterImpl;
+import org.apache.arrow.vector.complex.impl.IntWriterImpl;
+import org.apache.arrow.vector.complex.impl.IntervalDayWriterImpl;
+import org.apache.arrow.vector.complex.impl.IntervalMonthDayNanoWriterImpl;
+import org.apache.arrow.vector.complex.impl.IntervalYearWriterImpl;
+import org.apache.arrow.vector.complex.impl.LargeVarBinaryWriterImpl;
+import org.apache.arrow.vector.complex.impl.LargeVarCharWriterImpl;
+import org.apache.arrow.vector.complex.impl.NullableStructWriter;
+import org.apache.arrow.vector.complex.impl.SmallIntWriterImpl;
+import org.apache.arrow.vector.complex.impl.TimeMicroWriterImpl;
+import org.apache.arrow.vector.complex.impl.TimeMilliWriterImpl;
+import org.apache.arrow.vector.complex.impl.TimeNanoWriterImpl;
+import org.apache.arrow.vector.complex.impl.TimeSecWriterImpl;
+import org.apache.arrow.vector.complex.impl.TimeStampMicroTZWriterImpl;
+import org.apache.arrow.vector.complex.impl.TimeStampMicroWriterImpl;
+import org.apache.arrow.vector.complex.impl.TimeStampMilliTZWriterImpl;
+import org.apache.arrow.vector.complex.impl.TimeStampMilliWriterImpl;
+import org.apache.arrow.vector.complex.impl.TimeStampNanoTZWriterImpl;
+import org.apache.arrow.vector.complex.impl.TimeStampNanoWriterImpl;
+import org.apache.arrow.vector.complex.impl.TimeStampSecTZWriterImpl;
+import org.apache.arrow.vector.complex.impl.TimeStampSecWriterImpl;
+import org.apache.arrow.vector.complex.impl.TinyIntWriterImpl;
+import org.apache.arrow.vector.complex.impl.UInt1WriterImpl;
+import org.apache.arrow.vector.complex.impl.UInt2WriterImpl;
+import org.apache.arrow.vector.complex.impl.UInt4WriterImpl;
+import org.apache.arrow.vector.complex.impl.UInt8WriterImpl;
+import org.apache.arrow.vector.complex.impl.UnionLargeListWriter;
+import org.apache.arrow.vector.complex.impl.UnionListWriter;
+import org.apache.arrow.vector.complex.impl.UnionWriter;
+import org.apache.arrow.vector.complex.impl.VarBinaryWriterImpl;
+import org.apache.arrow.vector.complex.impl.VarCharWriterImpl;
+import org.apache.arrow.vector.complex.writer.FieldWriter;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeVisitor;
+import org.apache.arrow.vector.types.pojo.ArrowType.Binary;
+import org.apache.arrow.vector.types.pojo.ArrowType.Bool;
+import org.apache.arrow.vector.types.pojo.ArrowType.Date;
+import org.apache.arrow.vector.types.pojo.ArrowType.Decimal;
+import org.apache.arrow.vector.types.pojo.ArrowType.Duration;
+import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType;
+import org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeBinary;
+import org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeList;
+import org.apache.arrow.vector.types.pojo.ArrowType.FloatingPoint;
+import org.apache.arrow.vector.types.pojo.ArrowType.Int;
+import org.apache.arrow.vector.types.pojo.ArrowType.Interval;
+import org.apache.arrow.vector.types.pojo.ArrowType.LargeBinary;
+import org.apache.arrow.vector.types.pojo.ArrowType.LargeUtf8;
+import org.apache.arrow.vector.types.pojo.ArrowType.List;
+import org.apache.arrow.vector.types.pojo.ArrowType.Map;
+import org.apache.arrow.vector.types.pojo.ArrowType.Null;
+import org.apache.arrow.vector.types.pojo.ArrowType.Struct;
+import org.apache.arrow.vector.types.pojo.ArrowType.Time;
+import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp;
+import org.apache.arrow.vector.types.pojo.ArrowType.Union;
+import org.apache.arrow.vector.types.pojo.ArrowType.Utf8;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.CallBack;
+
+/** An enumeration of all logical types supported by this library. */
+public class Types {
+
+ /**
+ * The actual enumeration of types.
+ */
+ public enum MinorType {
+ NULL(Null.INSTANCE) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new NullVector(field.getName());
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return null;
+ }
+ },
+ STRUCT(Struct.INSTANCE) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new StructVector(field.getName(), allocator, field.getFieldType(), schemaChangeCallback);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new NullableStructWriter((StructVector) vector);
+ }
+ },
+ TINYINT(new Int(8, true)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new TinyIntVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new TinyIntWriterImpl((TinyIntVector) vector);
+ }
+ },
+ SMALLINT(new Int(16, true)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new SmallIntVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new SmallIntWriterImpl((SmallIntVector) vector);
+ }
+ },
+ INT(new Int(32, true)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new IntVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new IntWriterImpl((IntVector) vector);
+ }
+ },
+ BIGINT(new Int(64, true)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new BigIntVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new BigIntWriterImpl((BigIntVector) vector);
+ }
+ },
+ DATEDAY(new Date(DateUnit.DAY)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new DateDayVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new DateDayWriterImpl((DateDayVector) vector);
+ }
+ },
+ DATEMILLI(new Date(DateUnit.MILLISECOND)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new DateMilliVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new DateMilliWriterImpl((DateMilliVector) vector);
+ }
+ },
+ TIMESEC(new Time(TimeUnit.SECOND, 32)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new TimeSecVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new TimeSecWriterImpl((TimeSecVector) vector);
+ }
+ },
+ TIMEMILLI(new Time(TimeUnit.MILLISECOND, 32)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new TimeMilliVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new TimeMilliWriterImpl((TimeMilliVector) vector);
+ }
+ },
+ TIMEMICRO(new Time(TimeUnit.MICROSECOND, 64)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new TimeMicroVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new TimeMicroWriterImpl((TimeMicroVector) vector);
+ }
+ },
+ TIMENANO(new Time(TimeUnit.NANOSECOND, 64)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new TimeNanoVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new TimeNanoWriterImpl((TimeNanoVector) vector);
+ }
+ },
+ // time in second from the Unix epoch, 00:00:00.000000 on 1 January 1970, UTC.
+ TIMESTAMPSEC(new Timestamp(org.apache.arrow.vector.types.TimeUnit.SECOND, null)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new TimeStampSecVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new TimeStampSecWriterImpl((TimeStampSecVector) vector);
+ }
+ },
+ // time in millis from the Unix epoch, 00:00:00.000 on 1 January 1970, UTC.
+ TIMESTAMPMILLI(new Timestamp(org.apache.arrow.vector.types.TimeUnit.MILLISECOND, null)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new TimeStampMilliVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new TimeStampMilliWriterImpl((TimeStampMilliVector) vector);
+ }
+ },
+ // time in microsecond from the Unix epoch, 00:00:00.000000 on 1 January 1970, UTC.
+ TIMESTAMPMICRO(new Timestamp(org.apache.arrow.vector.types.TimeUnit.MICROSECOND, null)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new TimeStampMicroVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new TimeStampMicroWriterImpl((TimeStampMicroVector) vector);
+ }
+ },
+ // time in nanosecond from the Unix epoch, 00:00:00.000000000 on 1 January 1970, UTC.
+ TIMESTAMPNANO(new Timestamp(org.apache.arrow.vector.types.TimeUnit.NANOSECOND, null)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new TimeStampNanoVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new TimeStampNanoWriterImpl((TimeStampNanoVector) vector);
+ }
+ },
+ INTERVALDAY(new Interval(IntervalUnit.DAY_TIME)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new IntervalDayVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new IntervalDayWriterImpl((IntervalDayVector) vector);
+ }
+ },
+ INTERVALMONTHDAYNANO(new Interval(IntervalUnit.MONTH_DAY_NANO)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new IntervalMonthDayNanoVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new IntervalMonthDayNanoWriterImpl((IntervalMonthDayNanoVector) vector);
+ }
+ },
+ DURATION(null) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new DurationVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new DurationWriterImpl((DurationVector) vector);
+ }
+ },
+
+
+ INTERVALYEAR(new Interval(IntervalUnit.YEAR_MONTH)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new IntervalYearVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new IntervalYearWriterImpl((IntervalYearVector) vector);
+ }
+ },
+ // 4 byte ieee 754
+ FLOAT4(new FloatingPoint(SINGLE)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new Float4Vector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new Float4WriterImpl((Float4Vector) vector);
+ }
+ },
+ // 8 byte ieee 754
+ FLOAT8(new FloatingPoint(DOUBLE)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new Float8Vector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new Float8WriterImpl((Float8Vector) vector);
+ }
+ },
+ BIT(Bool.INSTANCE) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new BitVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new BitWriterImpl((BitVector) vector);
+ }
+ },
+ VARCHAR(Utf8.INSTANCE) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new VarCharVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new VarCharWriterImpl((VarCharVector) vector);
+ }
+ },
+ LARGEVARCHAR(LargeUtf8.INSTANCE) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new LargeVarCharVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new LargeVarCharWriterImpl((LargeVarCharVector) vector);
+ }
+ },
+ LARGEVARBINARY(LargeBinary.INSTANCE) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new LargeVarBinaryVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new LargeVarBinaryWriterImpl((LargeVarBinaryVector) vector);
+ }
+ },
+ VARBINARY(Binary.INSTANCE) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new VarBinaryVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new VarBinaryWriterImpl((VarBinaryVector) vector);
+ }
+ },
+ DECIMAL(null) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new DecimalVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new DecimalWriterImpl((DecimalVector) vector);
+ }
+ },
+ DECIMAL256(null) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new Decimal256Vector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new Decimal256WriterImpl((Decimal256Vector) vector);
+ }
+ },
+ FIXEDSIZEBINARY(null) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new FixedSizeBinaryVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new FixedSizeBinaryWriterImpl((FixedSizeBinaryVector) vector);
+ }
+ },
+ UINT1(new Int(8, false)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new UInt1Vector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new UInt1WriterImpl((UInt1Vector) vector);
+ }
+ },
+ UINT2(new Int(16, false)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new UInt2Vector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new UInt2WriterImpl((UInt2Vector) vector);
+ }
+ },
+ UINT4(new Int(32, false)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new UInt4Vector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new UInt4WriterImpl((UInt4Vector) vector);
+ }
+ },
+ UINT8(new Int(64, false)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new UInt8Vector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new UInt8WriterImpl((UInt8Vector) vector);
+ }
+ },
+ LIST(List.INSTANCE) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new ListVector(field.getName(), allocator, field.getFieldType(), schemaChangeCallback);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new UnionListWriter((ListVector) vector);
+ }
+ },
+ LARGELIST(ArrowType.LargeList.INSTANCE) {
+ @Override
+ public FieldVector getNewVector(Field field, BufferAllocator allocator, CallBack schemaChangeCallback) {
+ return new LargeListVector(field.getName(), allocator, field.getFieldType(), schemaChangeCallback);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new UnionLargeListWriter((LargeListVector) vector);
+ }
+ },
+ FIXED_SIZE_LIST(null) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new FixedSizeListVector(field.getName(), allocator, field.getFieldType(), schemaChangeCallback);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ throw new UnsupportedOperationException("FieldWriter not implemented for FixedSizeList " +
+ "type");
+ }
+ },
+ UNION(new Union(Sparse, null)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ if (field.getFieldType().getDictionary() != null) {
+ throw new UnsupportedOperationException("Dictionary encoding not supported for complex " +
+ "types");
+ }
+ return new UnionVector(field.getName(), allocator, field.getFieldType(), schemaChangeCallback);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new UnionWriter((UnionVector) vector);
+ }
+ },
+ DENSEUNION(new Union(Dense, null)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ if (field.getFieldType().getDictionary() != null) {
+ throw new UnsupportedOperationException("Dictionary encoding not supported for complex " +
+ "types");
+ }
+ return new DenseUnionVector(field.getName(), allocator, field.getFieldType(), schemaChangeCallback);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new DenseUnionWriter((DenseUnionVector) vector);
+ }
+ },
+ MAP(null) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new MapVector(field.getName(), allocator, field.getFieldType(), schemaChangeCallback);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new UnionListWriter((MapVector) vector);
+ }
+ },
+ TIMESTAMPSECTZ(null) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new TimeStampSecTZVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new TimeStampSecTZWriterImpl((TimeStampSecTZVector) vector);
+ }
+ },
+ TIMESTAMPMILLITZ(null) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new TimeStampMilliTZVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new TimeStampMilliTZWriterImpl((TimeStampMilliTZVector) vector);
+ }
+ },
+ TIMESTAMPMICROTZ(null) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new TimeStampMicroTZVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new TimeStampMicroTZWriterImpl((TimeStampMicroTZVector) vector);
+ }
+ },
+ TIMESTAMPNANOTZ(null) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new TimeStampNanoTZVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new TimeStampNanoTZWriterImpl((TimeStampNanoTZVector) vector);
+ }
+ },
+ EXTENSIONTYPE(null) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return ((ExtensionType) field.getFieldType().getType()).getNewVector(field.getName(),
+ field.getFieldType(), allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return ((ExtensionTypeVector) vector).getUnderlyingVector().getMinorType().getNewFieldWriter(vector);
+ }
+ },
+ ;
+
+ private final ArrowType type;
+
+ MinorType(ArrowType type) {
+ this.type = type;
+ }
+
+ /**
+ * Returns the {@link ArrowType} equivalent of this type.
+ */
+ public final ArrowType getType() {
+ if (type == null) {
+ throw new UnsupportedOperationException("Cannot get simple type for type " + name());
+ }
+ return type;
+ }
+
+ /** Constructs a new vector for the given type. */
+ public final FieldVector getNewVector(
+ String name,
+ FieldType fieldType,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return getNewVector(new Field(name, fieldType, null), allocator, schemaChangeCallback);
+ }
+
+ /** Constructs a new vector for the given type. */
+ public abstract FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback);
+
+ public abstract FieldWriter getNewFieldWriter(ValueVector vector);
+ }
+
+ /**
+ * Maps the ArrowType to the java implementations MinorType.
+ */
+ public static MinorType getMinorTypeForArrowType(ArrowType arrowType) {
+ return arrowType.accept(new ArrowTypeVisitor<MinorType>() {
+ @Override
+ public MinorType visit(Null type) {
+ return MinorType.NULL;
+ }
+
+ @Override
+ public MinorType visit(Struct type) {
+ return MinorType.STRUCT;
+ }
+
+ @Override
+ public MinorType visit(List type) {
+ return MinorType.LIST;
+ }
+
+ @Override
+ public MinorType visit(FixedSizeList type) {
+ return MinorType.FIXED_SIZE_LIST;
+ }
+
+ @Override
+ public MinorType visit(Union type) {
+ switch (type.getMode()) {
+ case Sparse:
+ return MinorType.UNION;
+ case Dense:
+ return MinorType.DENSEUNION;
+ default:
+ throw new IllegalArgumentException("only Dense or Sparse unions supported: " + type);
+ }
+ }
+
+ @Override
+ public MinorType visit(Map type) {
+ return MinorType.MAP;
+ }
+
+ @Override
+ public MinorType visit(ArrowType.LargeList type) {
+ return MinorType.LARGELIST;
+ }
+
+ @Override
+ public MinorType visit(Int type) {
+ switch (type.getBitWidth()) {
+ case 8:
+ return type.getIsSigned() ? MinorType.TINYINT : MinorType.UINT1;
+ case 16:
+ return type.getIsSigned() ? MinorType.SMALLINT : MinorType.UINT2;
+ case 32:
+ return type.getIsSigned() ? MinorType.INT : MinorType.UINT4;
+ case 64:
+ return type.getIsSigned() ? MinorType.BIGINT : MinorType.UINT8;
+ default:
+ throw new IllegalArgumentException("only 8, 16, 32, 64 supported: " + type);
+ }
+ }
+
+ @Override
+ public MinorType visit(FloatingPoint type) {
+ switch (type.getPrecision()) {
+ case HALF:
+ throw new UnsupportedOperationException("NYI: " + type);
+ case SINGLE:
+ return MinorType.FLOAT4;
+ case DOUBLE:
+ return MinorType.FLOAT8;
+ default:
+ throw new IllegalArgumentException("unknown precision: " + type);
+ }
+ }
+
+ @Override
+ public MinorType visit(Utf8 type) {
+ return MinorType.VARCHAR;
+ }
+
+ @Override
+ public Types.MinorType visit(LargeUtf8 type) {
+ return MinorType.LARGEVARCHAR;
+ }
+
+ @Override
+ public MinorType visit(Binary type) {
+ return MinorType.VARBINARY;
+ }
+
+ @Override
+ public MinorType visit(LargeBinary type) {
+ return MinorType.LARGEVARBINARY;
+ }
+
+ @Override
+ public MinorType visit(Bool type) {
+ return MinorType.BIT;
+ }
+
+ @Override
+ public MinorType visit(Decimal type) {
+ if (type.getBitWidth() == 256) {
+ return MinorType.DECIMAL256;
+ }
+ return MinorType.DECIMAL;
+ }
+
+ @Override
+ public MinorType visit(FixedSizeBinary type) {
+ return MinorType.FIXEDSIZEBINARY;
+ }
+
+ @Override
+ public MinorType visit(Date type) {
+ switch (type.getUnit()) {
+ case DAY:
+ return MinorType.DATEDAY;
+ case MILLISECOND:
+ return MinorType.DATEMILLI;
+ default:
+ throw new IllegalArgumentException("unknown unit: " + type);
+ }
+ }
+
+ @Override
+ public MinorType visit(Time type) {
+ switch (type.getUnit()) {
+ case SECOND:
+ return MinorType.TIMESEC;
+ case MILLISECOND:
+ return MinorType.TIMEMILLI;
+ case MICROSECOND:
+ return MinorType.TIMEMICRO;
+ case NANOSECOND:
+ return MinorType.TIMENANO;
+ default:
+ throw new IllegalArgumentException("unknown unit: " + type);
+ }
+ }
+
+ @Override
+ public MinorType visit(Timestamp type) {
+ String tz = type.getTimezone();
+ switch (type.getUnit()) {
+ case SECOND:
+ return tz == null ? MinorType.TIMESTAMPSEC : MinorType.TIMESTAMPSECTZ;
+ case MILLISECOND:
+ return tz == null ? MinorType.TIMESTAMPMILLI : MinorType.TIMESTAMPMILLITZ;
+ case MICROSECOND:
+ return tz == null ? MinorType.TIMESTAMPMICRO : MinorType.TIMESTAMPMICROTZ;
+ case NANOSECOND:
+ return tz == null ? MinorType.TIMESTAMPNANO : MinorType.TIMESTAMPNANOTZ;
+ default:
+ throw new IllegalArgumentException("unknown unit: " + type);
+ }
+ }
+
+ @Override
+ public MinorType visit(Interval type) {
+ switch (type.getUnit()) {
+ case DAY_TIME:
+ return MinorType.INTERVALDAY;
+ case YEAR_MONTH:
+ return MinorType.INTERVALYEAR;
+ case MONTH_DAY_NANO:
+ return MinorType.INTERVALMONTHDAYNANO;
+ default:
+ throw new IllegalArgumentException("unknown unit: " + type);
+ }
+ }
+
+ @Override
+ public MinorType visit(Duration type) {
+ return MinorType.DURATION;
+ }
+
+ @Override
+ public MinorType visit(ExtensionType type) {
+ return MinorType.EXTENSIONTYPE;
+ }
+ });
+ }
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/UnionMode.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/UnionMode.java
new file mode 100644
index 000000000..19956ac6a
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/UnionMode.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.types;
+
+/**
+ * Different memory layouts for Union Vectors.
+ */
+public enum UnionMode {
+ /**
+ * Each child vector is the same length as the overall vector, and there is one 8-bit integer buffer to indicate
+ * the index of a child vector to use at any given position.
+ */
+ Sparse(org.apache.arrow.flatbuf.UnionMode.Sparse),
+ /**
+ * Each child vector is of variable width. The parent vector contains both a child index vector (like in
+ * {@link #Sparse}) and in addition a slot index buffer to determine the offset into the child vector indicated
+ * by the index vector.
+ */
+ Dense(org.apache.arrow.flatbuf.UnionMode.Dense);
+
+ private static final UnionMode[] valuesByFlatbufId = new UnionMode[UnionMode.values().length];
+
+ static {
+ for (UnionMode v : UnionMode.values()) {
+ valuesByFlatbufId[v.flatbufID] = v;
+ }
+ }
+
+ private final short flatbufID;
+
+ private UnionMode(short flatbufID) {
+ this.flatbufID = flatbufID;
+ }
+
+ public short getFlatbufID() {
+ return flatbufID;
+ }
+
+ public static UnionMode fromFlatbufID(short id) {
+ return valuesByFlatbufId[id];
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/DictionaryEncoding.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/DictionaryEncoding.java
new file mode 100644
index 000000000..8d41b92d8
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/DictionaryEncoding.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.types.pojo;
+
+import java.util.Objects;
+
+import org.apache.arrow.vector.types.pojo.ArrowType.Int;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonGetter;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+/**
+ * A POJO representation of Arrow Dictionary metadata.
+ */
+public class DictionaryEncoding {
+
+ private final long id;
+ private final boolean ordered;
+ private final Int indexType;
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param id The ID of the dictionary to use for encoding.
+ * @param ordered Whether the keys in values in the dictionary are ordered.
+ * @param indexType (nullable). The integer type to use for indexing in the dictionary. Defaults to a signed
+ * 32 bit integer.
+ */
+ @JsonCreator
+ public DictionaryEncoding(
+ @JsonProperty("id") long id,
+ @JsonProperty("isOrdered") boolean ordered,
+ @JsonProperty("indexType") Int indexType) {
+ this.id = id;
+ this.ordered = ordered;
+ this.indexType = indexType == null ? new Int(32, true) : indexType;
+ }
+
+ public long getId() {
+ return id;
+ }
+
+ @JsonGetter("isOrdered")
+ public boolean isOrdered() {
+ return ordered;
+ }
+
+ public Int getIndexType() {
+ return indexType;
+ }
+
+ @Override
+ public String toString() {
+ return "DictionaryEncoding[id=" + id + ",ordered=" + ordered + ",indexType=" + indexType + "]";
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ } else if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ DictionaryEncoding that = (DictionaryEncoding) o;
+ return id == that.id && ordered == that.ordered && Objects.equals(indexType, that.indexType);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(id, ordered, indexType);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/ExtensionTypeRegistry.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/ExtensionTypeRegistry.java
new file mode 100644
index 000000000..f347008b4
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/ExtensionTypeRegistry.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.types.pojo;
+
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+
+import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType;
+
+/**
+ * A registry of recognized extension types.
+ */
+public final class ExtensionTypeRegistry {
+ private static final ConcurrentMap<String, ExtensionType> registry = new ConcurrentHashMap<>();
+
+ public static void register(ExtensionType type) {
+ registry.put(type.extensionName(), type);
+ }
+
+ public static void unregister(ExtensionType type) {
+ registry.remove(type.extensionName());
+ }
+
+ public static ExtensionType lookup(String name) {
+ return registry.get(name);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java
new file mode 100644
index 000000000..3a5ef1153
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java
@@ -0,0 +1,306 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.types.pojo;
+
+import static org.apache.arrow.util.Preconditions.checkNotNull;
+import static org.apache.arrow.vector.complex.BaseRepeatedValueVector.DATA_VECTOR_NAME;
+import static org.apache.arrow.vector.types.pojo.ArrowType.getTypeForField;
+import static org.apache.arrow.vector.types.pojo.Schema.convertMetadata;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Objects;
+import java.util.stream.Collectors;
+
+import org.apache.arrow.flatbuf.KeyValue;
+import org.apache.arrow.flatbuf.Type;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.util.Collections2;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.TypeLayout;
+import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonInclude.Include;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.google.flatbuffers.FlatBufferBuilder;
+
+/**
+ * A POJO abstraction for the Flatbuffer description of Vector Type.
+ */
+public class Field {
+
+ private static final Logger logger = LoggerFactory.getLogger(Field.class);
+
+ public static Field nullablePrimitive(String name, ArrowType.PrimitiveType type) {
+ return nullable(name, type);
+ }
+
+ public static Field nullable(String name, ArrowType type) {
+ return new Field(name, FieldType.nullable(type), null);
+ }
+
+ private final String name;
+ private final FieldType fieldType;
+ private final List<Field> children;
+
+ private Field(
+ String name,
+ boolean nullable,
+ ArrowType type,
+ DictionaryEncoding dictionary,
+ List<Field> children,
+ Map<String, String> metadata) {
+ this(name, new FieldType(nullable, type, dictionary, metadata), children);
+ }
+
+ @JsonCreator
+ private Field(
+ @JsonProperty("name") String name,
+ @JsonProperty("nullable") boolean nullable,
+ @JsonProperty("type") ArrowType type,
+ @JsonProperty("dictionary") DictionaryEncoding dictionary,
+ @JsonProperty("children") List<Field> children,
+ @JsonProperty("metadata") List<Map<String, String>> metadata) {
+ this(name, new FieldType(nullable, type, dictionary, convertMetadata(metadata)), children);
+ }
+
+ private Field(String name, FieldType fieldType, List<Field> children, TypeLayout typeLayout) {
+ this.name = name;
+ this.fieldType = checkNotNull(fieldType);
+ this.children = children == null ? Collections.emptyList() : Collections2.toImmutableList(children);
+ }
+
+ public Field(String name, FieldType fieldType, List<Field> children) {
+ this(name, fieldType, children, fieldType == null ? null : TypeLayout.getTypeLayout(fieldType.getType()));
+ }
+
+ /**
+ * Construct a new vector of this type using the given allocator.
+ */
+ public FieldVector createVector(BufferAllocator allocator) {
+ FieldVector vector = fieldType.createNewSingleVector(this, allocator, null);
+ vector.initializeChildrenFromFields(children);
+ return vector;
+ }
+
+ /**
+ * Constructs a new instance from a flatbuffer representation of the field.
+ */
+ public static Field convertField(org.apache.arrow.flatbuf.Field field) {
+ Map<String, String> metadata = new HashMap<>();
+ for (int i = 0; i < field.customMetadataLength(); i++) {
+ KeyValue kv = field.customMetadata(i);
+ String key = kv.key();
+ String value = kv.value();
+ metadata.put(key == null ? "" : key, value == null ? "" : value);
+ }
+ metadata = Collections.unmodifiableMap(metadata);
+
+ String name = field.name();
+ boolean nullable = field.nullable();
+ ArrowType type = getTypeForField(field);
+
+ if (metadata.containsKey(ExtensionType.EXTENSION_METADATA_KEY_NAME)) {
+ final String extensionName = metadata.get(ExtensionType.EXTENSION_METADATA_KEY_NAME);
+ final String extensionMetadata = metadata.getOrDefault(ExtensionType.EXTENSION_METADATA_KEY_METADATA, "");
+ ExtensionType extensionType = ExtensionTypeRegistry.lookup(extensionName);
+ if (extensionType != null) {
+ type = extensionType.deserialize(type, extensionMetadata);
+ } else {
+ // Otherwise, we haven't registered the type
+ logger.info("Unrecognized extension type: {}", extensionName);
+ }
+ }
+
+ DictionaryEncoding dictionary = null;
+ org.apache.arrow.flatbuf.DictionaryEncoding dictionaryFB = field.dictionary();
+ if (dictionaryFB != null) {
+ ArrowType.Int indexType = null;
+ org.apache.arrow.flatbuf.Int indexTypeFB = dictionaryFB.indexType();
+ if (indexTypeFB != null) {
+ indexType = new ArrowType.Int(indexTypeFB.bitWidth(), indexTypeFB.isSigned());
+ }
+ dictionary = new DictionaryEncoding(dictionaryFB.id(), dictionaryFB.isOrdered(), indexType);
+ }
+ List<Field> children = new ArrayList<>();
+ for (int i = 0; i < field.childrenLength(); i++) {
+ Field childField = convertField(field.children(i));
+ childField = mutateOriginalNameIfNeeded(field, childField);
+ children.add(childField);
+ }
+ children = Collections.unmodifiableList(children);
+ return new Field(name, nullable, type, dictionary, children, metadata);
+ }
+
+ /**
+ * Helper method to ensure backward compatibility with schemas generated prior to ARROW-1347, ARROW-1663.
+ *
+ * @param field the field to check
+ * @param originalChildField original field which name might be mutated
+ * @return original or mutated field
+ */
+ private static Field mutateOriginalNameIfNeeded(org.apache.arrow.flatbuf.Field field, Field originalChildField) {
+ if ((field.typeType() == Type.List || field.typeType() == Type.FixedSizeList) &&
+ originalChildField.getName().equals("[DEFAULT]")) {
+ return
+ new Field(DATA_VECTOR_NAME,
+ originalChildField.isNullable(),
+ originalChildField.getType(),
+ originalChildField.getDictionary(),
+ originalChildField.getChildren(),
+ originalChildField.getMetadata());
+ }
+ return originalChildField;
+ }
+
+ /**
+ * Puts this object into <code>builder</code> and returns the length of the serialized flatbuffer.
+ */
+ public int getField(FlatBufferBuilder builder) {
+ int nameOffset = name == null ? -1 : builder.createString(name);
+ int typeOffset = getType().getType(builder);
+ int dictionaryOffset = -1;
+ DictionaryEncoding dictionary = getDictionary();
+ if (dictionary != null) {
+ int dictionaryType = dictionary.getIndexType().getType(builder);
+ org.apache.arrow.flatbuf.DictionaryEncoding.startDictionaryEncoding(builder);
+ org.apache.arrow.flatbuf.DictionaryEncoding.addId(builder, dictionary.getId());
+ org.apache.arrow.flatbuf.DictionaryEncoding.addIsOrdered(builder, dictionary.isOrdered());
+ org.apache.arrow.flatbuf.DictionaryEncoding.addIndexType(builder, dictionaryType);
+ dictionaryOffset = org.apache.arrow.flatbuf.DictionaryEncoding.endDictionaryEncoding(builder);
+ }
+ int[] childrenData = new int[children.size()];
+ for (int i = 0; i < children.size(); i++) {
+ childrenData[i] = children.get(i).getField(builder);
+ }
+ int childrenOffset = org.apache.arrow.flatbuf.Field.createChildrenVector(builder, childrenData);
+ int[] metadataOffsets = new int[getMetadata().size()];
+ Iterator<Entry<String, String>> metadataIterator = getMetadata().entrySet().iterator();
+ for (int i = 0; i < metadataOffsets.length; i++) {
+ Entry<String, String> kv = metadataIterator.next();
+ int keyOffset = builder.createString(kv.getKey());
+ int valueOffset = builder.createString(kv.getValue());
+ KeyValue.startKeyValue(builder);
+ KeyValue.addKey(builder, keyOffset);
+ KeyValue.addValue(builder, valueOffset);
+ metadataOffsets[i] = KeyValue.endKeyValue(builder);
+ }
+ int metadataOffset = org.apache.arrow.flatbuf.Field.createCustomMetadataVector(builder, metadataOffsets);
+ org.apache.arrow.flatbuf.Field.startField(builder);
+ if (name != null) {
+ org.apache.arrow.flatbuf.Field.addName(builder, nameOffset);
+ }
+ org.apache.arrow.flatbuf.Field.addNullable(builder, isNullable());
+ org.apache.arrow.flatbuf.Field.addTypeType(builder, getType().getTypeID().getFlatbufID());
+ org.apache.arrow.flatbuf.Field.addType(builder, typeOffset);
+ org.apache.arrow.flatbuf.Field.addChildren(builder, childrenOffset);
+ org.apache.arrow.flatbuf.Field.addCustomMetadata(builder, metadataOffset);
+ if (dictionary != null) {
+ org.apache.arrow.flatbuf.Field.addDictionary(builder, dictionaryOffset);
+ }
+ return org.apache.arrow.flatbuf.Field.endField(builder);
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public boolean isNullable() {
+ return fieldType.isNullable();
+ }
+
+ public ArrowType getType() {
+ return fieldType.getType();
+ }
+
+ @JsonIgnore
+ public FieldType getFieldType() {
+ return fieldType;
+ }
+
+ @JsonInclude(Include.NON_NULL)
+ public DictionaryEncoding getDictionary() {
+ return fieldType.getDictionary();
+ }
+
+ public List<Field> getChildren() {
+ return children;
+ }
+
+ @JsonIgnore
+ public Map<String, String> getMetadata() {
+ return fieldType.getMetadata();
+ }
+
+ @JsonProperty("metadata")
+ @JsonInclude(Include.NON_EMPTY)
+ List<Map<String, String>> getMetadataForJson() {
+ return convertMetadata(getMetadata());
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(name, isNullable(), getType(), getDictionary(), getMetadata(), children);
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (!(obj instanceof Field)) {
+ return false;
+ }
+ Field that = (Field) obj;
+ return Objects.equals(this.name, that.name) &&
+ Objects.equals(this.isNullable(), that.isNullable()) &&
+ Objects.equals(this.getType(), that.getType()) &&
+ Objects.equals(this.getDictionary(), that.getDictionary()) &&
+ Objects.equals(this.getMetadata(), that.getMetadata()) &&
+ Objects.equals(this.children, that.children);
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ if (name != null) {
+ sb.append(name).append(": ");
+ }
+ sb.append(getType());
+ if (getDictionary() != null) {
+ sb.append("[dictionary: ").append(getDictionary().getId()).append("]");
+ }
+ if (!children.isEmpty()) {
+ sb.append("<").append(children.stream()
+ .map(t -> t.toString())
+ .collect(Collectors.joining(", ")))
+ .append(">");
+ }
+ if (!isNullable()) {
+ sb.append(" not null");
+ }
+ return sb.toString();
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/FieldType.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/FieldType.java
new file mode 100644
index 000000000..bb3250ef1
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/FieldType.java
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.types.pojo;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Objects;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.util.Collections2;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType;
+import org.apache.arrow.vector.util.CallBack;
+
+/**
+ * POJO representation of an Arrow field type. It consists of a logical type, nullability and whether the field
+ * (column) is dictionary encoded.
+ */
+public class FieldType {
+
+ public static FieldType nullable(ArrowType type) {
+ return new FieldType(true, type, null, null);
+ }
+
+ private final boolean nullable;
+ private final ArrowType type;
+ private final DictionaryEncoding dictionary;
+ private final Map<String, String> metadata;
+
+ public FieldType(boolean nullable, ArrowType type, DictionaryEncoding dictionary) {
+ this(nullable, type, dictionary, null);
+ }
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param nullable Whether the Vector is nullable
+ * @param type The logical arrow type of the field.
+ * @param dictionary The dictionary encoding of the field.
+ * @param metadata Custom metadata for the field.
+ */
+ public FieldType(boolean nullable, ArrowType type, DictionaryEncoding dictionary, Map<String, String> metadata) {
+ super();
+ this.nullable = nullable;
+ this.type = Preconditions.checkNotNull(type);
+ this.dictionary = dictionary;
+ if (type instanceof ExtensionType) {
+ // Save the extension type name/metadata
+ final Map<String, String> extensionMetadata = new HashMap<>();
+ extensionMetadata.put(ExtensionType.EXTENSION_METADATA_KEY_NAME, ((ExtensionType) type).extensionName());
+ extensionMetadata.put(ExtensionType.EXTENSION_METADATA_KEY_METADATA, ((ExtensionType) type).serialize());
+ if (metadata != null) {
+ extensionMetadata.putAll(metadata);
+ }
+ this.metadata = Collections.unmodifiableMap(extensionMetadata);
+ } else {
+ this.metadata = metadata == null ? java.util.Collections.emptyMap() : Collections2.immutableMapCopy(metadata);
+ }
+ }
+
+ public boolean isNullable() {
+ return nullable;
+ }
+
+ public ArrowType getType() {
+ return type;
+ }
+
+ public DictionaryEncoding getDictionary() {
+ return dictionary;
+ }
+
+ public Map<String, String> getMetadata() {
+ return metadata;
+ }
+
+ public FieldVector createNewSingleVector(String name, BufferAllocator allocator, CallBack schemaCallBack) {
+ MinorType minorType = Types.getMinorTypeForArrowType(type);
+ return minorType.getNewVector(name, this, allocator, schemaCallBack);
+ }
+
+ public FieldVector createNewSingleVector(Field field, BufferAllocator allocator, CallBack schemaCallBack) {
+ MinorType minorType = Types.getMinorTypeForArrowType(type);
+ return minorType.getNewVector(field, allocator, schemaCallBack);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(nullable, type, dictionary, metadata);
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (!(obj instanceof FieldType)) {
+ return false;
+ }
+ FieldType that = (FieldType) obj;
+ return Objects.equals(this.isNullable(), that.isNullable()) &&
+ Objects.equals(this.getType(), that.getType()) &&
+ Objects.equals(this.getDictionary(), that.getDictionary()) &&
+ Objects.equals(this.getMetadata(), that.getMetadata());
+ }
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Schema.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Schema.java
new file mode 100644
index 000000000..d377b395c
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Schema.java
@@ -0,0 +1,247 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.types.pojo;
+
+
+import static org.apache.arrow.vector.types.pojo.Field.convertField;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.AbstractMap;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.stream.Collectors;
+
+import org.apache.arrow.flatbuf.KeyValue;
+import org.apache.arrow.util.Collections2;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.ipc.message.FBSerializables;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonInclude.Include;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.ObjectReader;
+import com.fasterxml.jackson.databind.ObjectWriter;
+import com.google.flatbuffers.FlatBufferBuilder;
+
+/**
+ * An Arrow Schema.
+ */
+public class Schema {
+
+ /**
+ * Search for a field by name in given the list of fields.
+ *
+ * @param fields the list of the fields
+ * @param name the name of the field to return
+ * @return the corresponding field
+ * @throws IllegalArgumentException if the field was not found
+ */
+ public static Field findField(List<Field> fields, String name) {
+ for (Field field : fields) {
+ if (field.getName().equals(name)) {
+ return field;
+ }
+ }
+ throw new IllegalArgumentException(String.format("field %s not found in %s", name, fields));
+ }
+
+ static final String METADATA_KEY = "key";
+ static final String METADATA_VALUE = "value";
+
+ private static final ObjectMapper mapper = new ObjectMapper();
+ private static final ObjectWriter writer = mapper.writerWithDefaultPrettyPrinter();
+ private static final ObjectReader reader = mapper.readerFor(Schema.class);
+
+ public static Schema fromJSON(String json) throws IOException {
+ return reader.readValue(Preconditions.checkNotNull(json));
+ }
+
+ public static Schema deserialize(ByteBuffer buffer) {
+ return convertSchema(org.apache.arrow.flatbuf.Schema.getRootAsSchema(buffer));
+ }
+
+ /** Converts a flatbuffer schema to its POJO representation. */
+ public static Schema convertSchema(org.apache.arrow.flatbuf.Schema schema) {
+ List<Field> fields = new ArrayList<>();
+ for (int i = 0; i < schema.fieldsLength(); i++) {
+ fields.add(convertField(schema.fields(i)));
+ }
+ Map<String, String> metadata = new HashMap<>();
+ for (int i = 0; i < schema.customMetadataLength(); i++) {
+ KeyValue kv = schema.customMetadata(i);
+ String key = kv.key();
+ String value = kv.value();
+ metadata.put(key == null ? "" : key, value == null ? "" : value);
+ }
+ return new Schema(true, Collections.unmodifiableList(fields), Collections.unmodifiableMap(metadata));
+ }
+
+ private final List<Field> fields;
+ private final Map<String, String> metadata;
+
+ public Schema(Iterable<Field> fields) {
+ this(fields, (Map<String, String>) null);
+ }
+
+ /**
+ * Constructor with metadata.
+ */
+ public Schema(Iterable<Field> fields,
+ Map<String, String> metadata) {
+ this(true,
+ Collections2.toImmutableList(fields),
+ metadata == null ? Collections.emptyMap() : Collections2.immutableMapCopy(metadata));
+ }
+
+
+ /**
+ * Constructor used for JSON deserialization.
+ */
+ @JsonCreator
+ private Schema(@JsonProperty("fields") Iterable<Field> fields,
+ @JsonProperty("metadata") List<Map<String, String>> metadata) {
+ this(fields, convertMetadata(metadata));
+ }
+
+
+ /**
+ * Private constructor to bypass automatic collection copy.
+ * @param unsafe a ignored argument. Its only purpose is to prevent using the constructor
+ * by accident because of type collisions (List vs Iterable).
+ */
+ private Schema(boolean unsafe, List<Field> fields, Map<String, String> metadata) {
+ this.fields = fields;
+ this.metadata = metadata;
+ }
+
+ static Map<String, String> convertMetadata(List<Map<String, String>> metadata) {
+ return (metadata == null) ? null : metadata.stream()
+ .map(e -> new AbstractMap.SimpleImmutableEntry<>(e.get(METADATA_KEY), e.get(METADATA_VALUE)))
+ .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
+ }
+
+ static List<Map<String, String>> convertMetadata(Map<String, String> metadata) {
+ return (metadata == null) ? null : metadata.entrySet()
+ .stream()
+ .map(Schema::convertEntryToKeyValueMap)
+ .collect(Collectors.toList());
+ }
+
+ private static Map<String, String> convertEntryToKeyValueMap(Map.Entry<String, String> entry) {
+ Map<String, String> map = new HashMap<>(2);
+ map.put(METADATA_KEY, entry.getKey());
+ map.put(METADATA_VALUE, entry.getValue());
+ return Collections.unmodifiableMap(map);
+ }
+
+ public List<Field> getFields() {
+ return fields;
+ }
+
+ @JsonIgnore
+ public Map<String, String> getCustomMetadata() {
+ return metadata;
+ }
+
+ @JsonProperty("metadata")
+ @JsonInclude(Include.NON_EMPTY)
+ List<Map<String, String>> getCustomMetadataForJson() {
+ return convertMetadata(getCustomMetadata());
+ }
+
+ /**
+ * Search for a field by name in this Schema.
+ *
+ * @param name the name of the field to return
+ * @return the corresponding field
+ * @throws IllegalArgumentException if the field was not found
+ */
+ public Field findField(String name) {
+ return findField(getFields(), name);
+ }
+
+ /**
+ * Returns the JSON string representation of this schema.
+ */
+ public String toJson() {
+ try {
+ return writer.writeValueAsString(this);
+ } catch (JsonProcessingException e) {
+ // this should not happen
+ throw new RuntimeException(e);
+ }
+ }
+
+ /**
+ * Adds this schema to the builder returning the size of the builder after adding.
+ */
+ public int getSchema(FlatBufferBuilder builder) {
+ int[] fieldOffsets = new int[fields.size()];
+ for (int i = 0; i < fields.size(); i++) {
+ fieldOffsets[i] = fields.get(i).getField(builder);
+ }
+ int fieldsOffset = org.apache.arrow.flatbuf.Schema.createFieldsVector(builder, fieldOffsets);
+ int metadataOffset = FBSerializables.writeKeyValues(builder, metadata);
+ org.apache.arrow.flatbuf.Schema.startSchema(builder);
+ org.apache.arrow.flatbuf.Schema.addFields(builder, fieldsOffset);
+ org.apache.arrow.flatbuf.Schema.addCustomMetadata(builder, metadataOffset);
+ return org.apache.arrow.flatbuf.Schema.endSchema(builder);
+ }
+
+ /**
+ * Returns the serialized flatbuffer representation of this schema.
+ */
+ public byte[] toByteArray() {
+ FlatBufferBuilder builder = new FlatBufferBuilder();
+ int schemaOffset = this.getSchema(builder);
+ builder.finish(schemaOffset);
+ ByteBuffer bb = builder.dataBuffer();
+ byte[] bytes = new byte[bb.remaining()];
+ bb.get(bytes);
+ return bytes;
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(fields, metadata);
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (!(obj instanceof Schema)) {
+ return false;
+ }
+ return Objects.equals(this.fields, ((Schema) obj).fields) &&
+ Objects.equals(this.metadata, ((Schema) obj).metadata);
+ }
+
+ @Override
+ public String toString() {
+ String meta = metadata.isEmpty() ? "" : "(metadata: " + metadata.toString() + ")";
+ return "Schema<" + fields.stream().map(t -> t.toString()).collect(Collectors.joining(", ")) + ">" + meta;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/ByteArrayReadableSeekableByteChannel.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/ByteArrayReadableSeekableByteChannel.java
new file mode 100644
index 000000000..b8ce9bde4
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/ByteArrayReadableSeekableByteChannel.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.channels.SeekableByteChannel;
+
+/**
+ * A {@link SeekableByteChannel} backed by a byte array.
+ */
+public class ByteArrayReadableSeekableByteChannel implements SeekableByteChannel {
+ private byte[] byteArray;
+ private int position = 0;
+
+ /**
+ * Construct a new object using the given byteArray as a backing store.
+ */
+ public ByteArrayReadableSeekableByteChannel(byte[] byteArray) {
+ if (byteArray == null) {
+ throw new NullPointerException();
+ }
+ this.byteArray = byteArray;
+ }
+
+ @Override
+ public boolean isOpen() {
+ return byteArray != null;
+ }
+
+ @Override
+ public void close() throws IOException {
+ byteArray = null;
+ }
+
+ @Override
+ public int read(final ByteBuffer dst) throws IOException {
+ int remainingInBuf = byteArray.length - this.position;
+ int length = Math.min(dst.remaining(), remainingInBuf);
+ dst.put(this.byteArray, this.position, length);
+ this.position += length;
+ return length;
+ }
+
+ @Override
+ public long position() throws IOException {
+ return this.position;
+ }
+
+ @Override
+ public SeekableByteChannel position(final long newPosition) throws IOException {
+ this.position = (int) newPosition;
+ return this;
+ }
+
+ @Override
+ public long size() throws IOException {
+ return this.byteArray.length;
+ }
+
+ @Override
+ public int write(final ByteBuffer src) throws IOException {
+ throw new UnsupportedOperationException("Read only");
+ }
+
+ @Override
+ public SeekableByteChannel truncate(final long size) throws IOException {
+ throw new UnsupportedOperationException("Read only");
+ }
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/CallBack.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/CallBack.java
new file mode 100644
index 000000000..ddeca59b0
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/CallBack.java
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+/**
+ * Generic callback interface to be notified of events on value vectors.
+ */
+public interface CallBack {
+ void doWork();
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/DataSizeRoundingUtil.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/DataSizeRoundingUtil.java
new file mode 100644
index 000000000..3af2c9837
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/DataSizeRoundingUtil.java
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+/**
+ * Utilities for rounding data size.
+ */
+public final class DataSizeRoundingUtil {
+
+ /**
+ * The mask for rounding an integer to a multiple of 8.
+ * (i.e. clear the lowest 3 bits)
+ */
+ public static int ROUND_8_MASK_INT = 0xFFFFFFF8;
+
+ /**
+ * The mask for rounding a long integer to a multiple of 8.
+ * (i.e. clear the lowest 3 bits)
+ */
+ public static long ROUND_8_MASK_LONG = 0xFFFFFFFFFFFFFFF8L;
+
+ /**
+ * The number of bits to shift for dividing by 8.
+ */
+ public static int DIVIDE_BY_8_SHIFT_BITS = 3;
+
+ /**
+ * Round up the number to the nearest multiple of 8.
+ * @param input the number to round.
+ * @return the rounded number.
+ */
+ public static int roundUpTo8Multiple(int input) {
+ return (input + 7) & ROUND_8_MASK_INT;
+ }
+
+ /**
+ * Round up the number to the nearest multiple of 8.
+ * @param input the number to round.
+ * @return the rounded number
+ */
+ public static long roundUpTo8Multiple(long input) {
+ return (input + 7L) & ROUND_8_MASK_LONG;
+ }
+
+ /**
+ * Round down the number to the nearest multiple of 8.
+ * @param input the number to round.
+ * @return the rounded number.
+ */
+ public static int roundDownTo8Multiple(int input) {
+ return input & ROUND_8_MASK_INT;
+ }
+
+ /**
+ * Round down the number to the nearest multiple of 8.
+ * @param input the number to round.
+ * @return the rounded number
+ */
+ public static long roundDownTo8Multiple(long input) {
+ return input & ROUND_8_MASK_LONG;
+ }
+
+ /**
+ * A fast way to compute Math.ceil(input / 8.0).
+ * @param input the input number.
+ * @return the computed number.
+ */
+ public static int divideBy8Ceil(int input) {
+ return (input + 7) >>> DIVIDE_BY_8_SHIFT_BITS;
+ }
+
+ /**
+ * A fast way to compute Math.ceil(input / 8.0).
+ * @param input the input number.
+ * @return the computed number.
+ */
+ public static long divideBy8Ceil(long input) {
+ return (input + 7) >>> (long) DIVIDE_BY_8_SHIFT_BITS;
+ }
+
+ private DataSizeRoundingUtil() {
+
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/DateUtility.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/DateUtility.java
new file mode 100644
index 000000000..9e8b6d26f
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/DateUtility.java
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import java.time.Instant;
+import java.time.LocalDateTime;
+import java.time.format.DateTimeFormatter;
+import java.time.format.DateTimeFormatterBuilder;
+import java.time.temporal.ChronoUnit;
+import java.util.TimeZone;
+
+/** Utility class for Date, DateTime, TimeStamp, Interval data types. */
+public class DateUtility {
+ private DateUtility() {}
+
+ private static final String UTC = "UTC";
+
+ public static final DateTimeFormatter formatDate = DateTimeFormatter.ofPattern("yyyy-MM-dd");
+ public static final DateTimeFormatter formatTimeStampMilli = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSS");
+ public static final DateTimeFormatter formatTimeStampTZ = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSS ZZZ");
+ public static final DateTimeFormatter formatTime = DateTimeFormatter.ofPattern("HH:mm:ss.SSS");
+
+ public static DateTimeFormatter dateTimeTZFormat = null;
+ public static DateTimeFormatter timeFormat = null;
+
+ public static final int yearsToMonths = 12;
+ public static final int hoursToMillis = 60 * 60 * 1000;
+ public static final int minutesToMillis = 60 * 1000;
+ public static final int secondsToMillis = 1000;
+ public static final int monthToStandardDays = 30;
+ public static final long monthsToMillis = 2592000000L; // 30 * 24 * 60 * 60 * 1000
+ public static final int daysToStandardMillis = 24 * 60 * 60 * 1000;
+
+ /** Returns the date time formatter used to parse date strings. */
+ public static DateTimeFormatter getDateTimeFormatter() {
+
+ if (dateTimeTZFormat == null) {
+ DateTimeFormatter dateFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd");
+ DateTimeFormatter optionalTime = DateTimeFormatter.ofPattern(" HH:mm:ss");
+ DateTimeFormatter optionalSec = DateTimeFormatter.ofPattern(".SSS");
+ DateTimeFormatter optionalZone = DateTimeFormatter.ofPattern(" ZZZ");
+
+ dateTimeTZFormat = new DateTimeFormatterBuilder().append(dateFormatter).appendOptional(optionalTime)
+ .appendOptional(optionalSec).appendOptional(optionalZone).toFormatter();
+ }
+
+ return dateTimeTZFormat;
+ }
+
+ /** Returns time formatter used to parse time strings. */
+ public static DateTimeFormatter getTimeFormatter() {
+ if (timeFormat == null) {
+ DateTimeFormatter timeFormatter = DateTimeFormatter.ofPattern("HH:mm:ss");
+ DateTimeFormatter optionalSec = DateTimeFormatter.ofPattern(".SSS");
+ timeFormat = new DateTimeFormatterBuilder().append(timeFormatter).appendOptional(optionalSec).toFormatter();
+ }
+ return timeFormat;
+ }
+
+ /**
+ * Convert milliseconds from epoch to a LocalDateTime with timeZone offset.
+ *
+ * @param epochMillis milliseconds from epoch
+ * @param timeZone current timeZone
+ * @return LocalDateTime object with timeZone offset
+ */
+ public static LocalDateTime getLocalDateTimeFromEpochMilli(long epochMillis, String timeZone) {
+ final LocalDateTime localDateTime = LocalDateTime.ofInstant(
+ Instant.ofEpochMilli(epochMillis), TimeZone.getTimeZone(timeZone).toZoneId());
+ return localDateTime;
+ }
+
+ /**
+ * Convert milliseconds from epoch to a LocalDateTime with UTC offset.
+ */
+ public static LocalDateTime getLocalDateTimeFromEpochMilli(long epochMillis) {
+ return getLocalDateTimeFromEpochMilli(epochMillis, UTC);
+ }
+
+ /**
+ * Convert microseconds from epoch to a LocalDateTime with timeZone offset.
+ *
+ * @param epochMicros microseconds from epoch
+ * @param timeZone current timeZone
+ * @return LocalDateTime object with timeZone offset
+ */
+ public static LocalDateTime getLocalDateTimeFromEpochMicro(long epochMicros, String timeZone) {
+ final long millis = java.util.concurrent.TimeUnit.MICROSECONDS.toMillis(epochMicros);
+ final long addl_micros = epochMicros - (millis * 1000);
+ return DateUtility.getLocalDateTimeFromEpochMilli(millis, timeZone).plus(addl_micros, ChronoUnit.MICROS);
+ }
+
+ /**
+ * Convert microseconds from epoch to a LocalDateTime with UTC offset.
+ */
+ public static LocalDateTime getLocalDateTimeFromEpochMicro(long epochMicros) {
+ return getLocalDateTimeFromEpochMicro(epochMicros, UTC);
+ }
+
+ /**
+ * Convert nanoseconds from epoch to a LocalDateTime with timeZone offset.
+ *
+ * @param epochNanos nanoseconds from epoch
+ * @param timeZone current timeZone
+ * @return LocalDateTime object with timeZone offset
+ */
+ public static LocalDateTime getLocalDateTimeFromEpochNano(long epochNanos, String timeZone) {
+ final long millis = java.util.concurrent.TimeUnit.NANOSECONDS.toMillis(epochNanos);
+ final long addl_nanos = epochNanos - (millis * 1000 * 1000);
+ return DateUtility.getLocalDateTimeFromEpochMilli(millis, timeZone).plusNanos(addl_nanos);
+ }
+
+ /**
+ * Convert nanoseconds from epoch to a LocalDateTime with UTC offset.
+ */
+ public static LocalDateTime getLocalDateTimeFromEpochNano(long epochNanos) {
+ return getLocalDateTimeFromEpochNano(epochNanos, UTC);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java
new file mode 100644
index 000000000..f778bcb20
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java
@@ -0,0 +1,188 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+
+import org.apache.arrow.memory.ArrowBuf;
+
+import io.netty.util.internal.PlatformDependent;
+
+/**
+ * Utility methods for configurable precision Decimal values (e.g. {@link BigDecimal}).
+ */
+public class DecimalUtility {
+ private DecimalUtility() {}
+
+ public static final byte [] zeroes = new byte[] {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+ public static final byte [] minus_one = new byte[] {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1};
+ private static final boolean LITTLE_ENDIAN = ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN;
+
+ /**
+ * Read an ArrowType.Decimal at the given value index in the ArrowBuf and convert to a BigDecimal
+ * with the given scale.
+ */
+ public static BigDecimal getBigDecimalFromArrowBuf(ArrowBuf bytebuf, int index, int scale, int byteWidth) {
+ byte[] value = new byte[byteWidth];
+ byte temp;
+ final long startIndex = (long) index * byteWidth;
+
+ bytebuf.getBytes(startIndex, value, 0, byteWidth);
+ if (LITTLE_ENDIAN) {
+ // Decimal stored as native endian, need to swap bytes to make BigDecimal if native endian is LE
+ int stop = byteWidth / 2;
+ for (int i = 0, j; i < stop; i++) {
+ temp = value[i];
+ j = (byteWidth - 1) - i;
+ value[i] = value[j];
+ value[j] = temp;
+ }
+ }
+ BigInteger unscaledValue = new BigInteger(value);
+ return new BigDecimal(unscaledValue, scale);
+ }
+
+ /**
+ * Read an ArrowType.Decimal from the ByteBuffer and convert to a BigDecimal with the given
+ * scale.
+ */
+ public static BigDecimal getBigDecimalFromByteBuffer(ByteBuffer bytebuf, int scale, int byteWidth) {
+ byte[] value = new byte[byteWidth];
+ bytebuf.get(value);
+ BigInteger unscaledValue = new BigInteger(value);
+ return new BigDecimal(unscaledValue, scale);
+ }
+
+ /**
+ * Read an ArrowType.Decimal from the ArrowBuf at the given value index and return it as a byte
+ * array.
+ */
+ public static byte[] getByteArrayFromArrowBuf(ArrowBuf bytebuf, int index, int byteWidth) {
+ final byte[] value = new byte[byteWidth];
+ final long startIndex = (long) index * byteWidth;
+ bytebuf.getBytes(startIndex, value, 0, byteWidth);
+ return value;
+ }
+
+ /**
+ * Check that the BigDecimal scale equals the vectorScale and that the BigDecimal precision is
+ * less than or equal to the vectorPrecision. If not, then an UnsupportedOperationException is
+ * thrown, otherwise returns true.
+ */
+ public static boolean checkPrecisionAndScale(BigDecimal value, int vectorPrecision, int vectorScale) {
+ if (value.scale() != vectorScale) {
+ throw new UnsupportedOperationException("BigDecimal scale must equal that in the Arrow vector: " +
+ value.scale() + " != " + vectorScale);
+ }
+ if (value.precision() > vectorPrecision) {
+ throw new UnsupportedOperationException("BigDecimal precision can not be greater than that in the Arrow " +
+ "vector: " + value.precision() + " > " + vectorPrecision);
+ }
+ return true;
+ }
+
+ /**
+ * Check that the decimal scale equals the vectorScale and that the decimal precision is
+ * less than or equal to the vectorPrecision. If not, then an UnsupportedOperationException is
+ * thrown, otherwise returns true.
+ */
+ public static boolean checkPrecisionAndScale(int decimalPrecision, int decimalScale, int vectorPrecision,
+ int vectorScale) {
+ if (decimalScale != vectorScale) {
+ throw new UnsupportedOperationException("BigDecimal scale must equal that in the Arrow vector: " +
+ decimalScale + " != " + vectorScale);
+ }
+ if (decimalPrecision > vectorPrecision) {
+ throw new UnsupportedOperationException("BigDecimal precision can not be greater than that in the Arrow " +
+ "vector: " + decimalPrecision + " > " + vectorPrecision);
+ }
+ return true;
+ }
+
+ /**
+ * Write the given BigDecimal to the ArrowBuf at the given value index. Will throw an
+ * UnsupportedOperationException if the decimal size is greater than the Decimal vector byte
+ * width.
+ */
+ public static void writeBigDecimalToArrowBuf(BigDecimal value, ArrowBuf bytebuf, int index, int byteWidth) {
+ final byte[] bytes = value.unscaledValue().toByteArray();
+ writeByteArrayToArrowBufHelper(bytes, bytebuf, index, byteWidth);
+ }
+
+ /**
+ * Write the given long to the ArrowBuf at the given value index.
+ * This routine extends the original sign bit to a new upper area in 128-bit or 256-bit.
+ */
+ public static void writeLongToArrowBuf(long value, ArrowBuf bytebuf, int index, int byteWidth) {
+ if (byteWidth != 16 && byteWidth != 32) {
+ throw new UnsupportedOperationException("DecimalUtility.writeLongToArrowBuf() currently supports " +
+ "128-bit or 256-bit width data");
+ }
+ final long addressOfValue = bytebuf.memoryAddress() + (long) index * byteWidth;
+ final long padValue = Long.signum(value) == -1 ? -1L : 0L;
+ if (LITTLE_ENDIAN) {
+ PlatformDependent.putLong(addressOfValue, value);
+ for (int i = 1; i <= (byteWidth - 8) / 8; i++) {
+ PlatformDependent.putLong(addressOfValue + Long.BYTES * i, padValue);
+ }
+ } else {
+ for (int i = 0; i < (byteWidth - 8) / 8; i++) {
+ PlatformDependent.putLong(addressOfValue + Long.BYTES * i, padValue);
+ }
+ PlatformDependent.putLong(addressOfValue + Long.BYTES * (byteWidth - 8) / 8, value);
+ }
+ }
+
+ /**
+ * Write the given byte array to the ArrowBuf at the given value index. Will throw an
+ * UnsupportedOperationException if the decimal size is greater than the Decimal vector byte
+ * width.
+ */
+ public static void writeByteArrayToArrowBuf(byte[] bytes, ArrowBuf bytebuf, int index, int byteWidth) {
+ writeByteArrayToArrowBufHelper(bytes, bytebuf, index, byteWidth);
+ }
+
+ private static void writeByteArrayToArrowBufHelper(byte[] bytes, ArrowBuf bytebuf, int index, int byteWidth) {
+ final long startIndex = (long) index * byteWidth;
+ if (bytes.length > byteWidth) {
+ throw new UnsupportedOperationException("Decimal size greater than " + byteWidth + " bytes: " + bytes.length);
+ }
+
+ byte [] padBytes = bytes[0] < 0 ? minus_one : zeroes;
+ if (LITTLE_ENDIAN) {
+ // Decimal stored as native-endian, need to swap data bytes before writing to ArrowBuf if LE
+ byte[] bytesLE = new byte[bytes.length];
+ for (int i = 0; i < bytes.length; i++) {
+ bytesLE[i] = bytes[bytes.length - 1 - i];
+ }
+
+ // Write LE data
+ bytebuf.setBytes(startIndex, bytesLE, 0, bytes.length);
+ bytebuf.setBytes(startIndex + bytes.length, padBytes, 0, byteWidth - bytes.length);
+ } else {
+ // Write BE data
+ bytebuf.setBytes(startIndex + byteWidth - bytes.length, bytes, 0, bytes.length);
+ bytebuf.setBytes(startIndex, padBytes, 0, byteWidth - bytes.length);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/DictionaryUtility.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/DictionaryUtility.java
new file mode 100644
index 000000000..9592f3975
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/DictionaryUtility.java
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+
+/**
+ * Utility methods for working with Dictionaries used in Dictionary encodings.
+ */
+public class DictionaryUtility {
+ private DictionaryUtility() {}
+
+ /**
+ * Convert field and child fields that have a dictionary encoding to message format, so fields
+ * have the dictionary type.
+ *
+ * <p>NOTE: in the message format, fields have the dictionary type
+ * in the memory format, they have the index type
+ */
+ public static Field toMessageFormat(Field field, DictionaryProvider provider, Set<Long> dictionaryIdsUsed) {
+ if (!needConvertToMessageFormat(field)) {
+ return field;
+ }
+ DictionaryEncoding encoding = field.getDictionary();
+ List<Field> children;
+
+
+ ArrowType type;
+ if (encoding == null) {
+ type = field.getType();
+ children = field.getChildren();
+ } else {
+ long id = encoding.getId();
+ Dictionary dictionary = provider.lookup(id);
+ if (dictionary == null) {
+ throw new IllegalArgumentException("Could not find dictionary with ID " + id);
+ }
+ type = dictionary.getVectorType();
+ children = dictionary.getVector().getField().getChildren();
+
+ dictionaryIdsUsed.add(id);
+ }
+
+ final List<Field> updatedChildren = new ArrayList<>(children.size());
+ for (Field child : children) {
+ updatedChildren.add(toMessageFormat(child, provider, dictionaryIdsUsed));
+ }
+
+ return new Field(field.getName(), new FieldType(field.isNullable(), type, encoding, field.getMetadata()),
+ updatedChildren);
+ }
+
+ /**
+ * Checks if it is required to convert the field to message format.
+ * @param field the field to check.
+ * @return true if a conversion is required, and false otherwise.
+ */
+ public static boolean needConvertToMessageFormat(Field field) {
+ DictionaryEncoding encoding = field.getDictionary();
+
+ if (encoding != null) {
+ // when encoding is not null, the type must be determined from the
+ // dictionary, so conversion must be performed.
+ return true;
+ }
+
+ List<Field> children = field.getChildren();
+ for (Field child : children) {
+ if (needConvertToMessageFormat(child)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Convert field and child fields that have a dictionary encoding to memory format, so fields
+ * have the index type.
+ */
+ public static Field toMemoryFormat(Field field, BufferAllocator allocator, Map<Long, Dictionary> dictionaries) {
+ DictionaryEncoding encoding = field.getDictionary();
+ List<Field> children = field.getChildren();
+
+ if (encoding == null && children.isEmpty()) {
+ return field;
+ }
+
+ List<Field> updatedChildren = new ArrayList<>(children.size());
+ for (Field child : children) {
+ updatedChildren.add(toMemoryFormat(child, allocator, dictionaries));
+ }
+
+ ArrowType type;
+ List<Field> fieldChildren = null;
+ if (encoding == null) {
+ type = field.getType();
+ fieldChildren = updatedChildren;
+ } else {
+ // re-type the field for in-memory format
+ type = encoding.getIndexType();
+ if (type == null) {
+ type = new ArrowType.Int(32, true);
+ }
+ // get existing or create dictionary vector
+ if (!dictionaries.containsKey(encoding.getId())) {
+ // create a new dictionary vector for the values
+ String dictName = "DICT" + encoding.getId();
+ Field dictionaryField = new Field(dictName,
+ new FieldType(field.isNullable(), field.getType(), null, null), updatedChildren);
+ FieldVector dictionaryVector = dictionaryField.createVector(allocator);
+ dictionaries.put(encoding.getId(), new Dictionary(dictionaryVector, encoding));
+ }
+ }
+
+ return new Field(field.getName(), new FieldType(field.isNullable(), type, encoding, field.getMetadata()),
+ fieldChildren);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/ElementAddressableVectorIterator.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/ElementAddressableVectorIterator.java
new file mode 100644
index 000000000..89c100779
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/ElementAddressableVectorIterator.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import java.util.Iterator;
+
+import org.apache.arrow.memory.util.ArrowBufPointer;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.memory.util.hash.SimpleHasher;
+import org.apache.arrow.vector.ElementAddressableVector;
+
+/**
+ * Iterator for traversing elements of a {@link ElementAddressableVector}.
+ * @param <T> vector type.
+ */
+public class ElementAddressableVectorIterator<T extends ElementAddressableVector>
+ implements Iterator<ArrowBufPointer> {
+
+ private final T vector;
+
+ /**
+ * Index of the next element to access.
+ */
+ private int index = 0;
+
+ private final ArrowBufPointer reusablePointer;
+
+ /**
+ * Constructs an iterator for the {@link ElementAddressableVector}.
+ * @param vector the vector to iterate.
+ */
+ public ElementAddressableVectorIterator(T vector) {
+ this(vector, SimpleHasher.INSTANCE);
+ }
+
+ /**
+ * Constructs an iterator for the {@link ElementAddressableVector}.
+ * @param vector the vector to iterate.
+ * @param hasher the hasher to calculate the hash code.
+ */
+ public ElementAddressableVectorIterator(T vector, ArrowBufHasher hasher) {
+ this.vector = vector;
+ reusablePointer = new ArrowBufPointer(hasher);
+ }
+
+ @Override
+ public boolean hasNext() {
+ return index < vector.getValueCount();
+ }
+
+ /**
+ * Retrieves the next pointer from the vector.
+ * @return the pointer pointing to the next element in the vector.
+ * Note that the returned pointer is only valid before the next call to this method.
+ */
+ @Override
+ public ArrowBufPointer next() {
+ vector.getDataPointer(index, reusablePointer);
+ index += 1;
+ return reusablePointer;
+ }
+
+ /**
+ * Retrieves the next pointer from the vector.
+ * @param outPointer the pointer to populate.
+ */
+ public void next(ArrowBufPointer outPointer) {
+ vector.getDataPointer(index, outPointer);
+ index += 1;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/JsonStringArrayList.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/JsonStringArrayList.java
new file mode 100644
index 000000000..2ca71ec63
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/JsonStringArrayList.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import java.util.ArrayList;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+/**
+ * Extension of {@link ArrayList} that {@link #toString()} method returns the serialized JSON
+ * version of its members (or throws an exception if they can't be converted to JSON).
+ *
+ * @param <E> Type of value held in the list.
+ */
+public class JsonStringArrayList<E> extends ArrayList<E> {
+
+ private static ObjectMapper mapper;
+
+ static {
+ mapper = new ObjectMapper();
+ }
+
+ public JsonStringArrayList() {
+ super();
+ }
+
+ public JsonStringArrayList(int size) {
+ super(size);
+ }
+
+ @Override
+ public final String toString() {
+ try {
+ return mapper.writeValueAsString(this);
+ } catch (JsonProcessingException e) {
+ throw new IllegalStateException("Cannot serialize array list to JSON string", e);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/JsonStringHashMap.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/JsonStringHashMap.java
new file mode 100644
index 000000000..f41ae4ee2
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/JsonStringHashMap.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import java.util.LinkedHashMap;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+/**
+ * Simple class that extends the regular java.util.HashMap but overrides the
+ * toString() method of the HashMap class to produce a JSON string instead
+ *
+ * @param <K> The type of the key for the map.
+ * @param <V> The type of the value for the map.
+ */
+public class JsonStringHashMap<K, V> extends LinkedHashMap<K, V> {
+
+ private static ObjectMapper mapper;
+
+ static {
+ mapper = new ObjectMapper();
+ }
+
+ @Override
+ public final String toString() {
+ try {
+ return mapper.writeValueAsString(this);
+ } catch (JsonProcessingException e) {
+ throw new IllegalStateException("Cannot serialize hash map to JSON string", e);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinal.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinal.java
new file mode 100644
index 000000000..cf157031b
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinal.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import java.util.Collection;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * An implementation of a map that supports constant time look-up by a generic key or an ordinal.
+ *
+ * <p>This class extends the functionality a regular {@link Map} with ordinal lookup support.
+ * Upon insertion an unused ordinal is assigned to the inserted (key, value) tuple.
+ * Upon update the same ordinal id is re-used while value is replaced.
+ * Upon deletion of an existing item, its corresponding ordinal is recycled and could be used by another item.
+ *
+ * <p>For any instance with N items, this implementation guarantees that ordinals are in the range of [0, N). However,
+ * the ordinal assignment is dynamic and may change after an insertion or deletion. Consumers of this class are
+ * responsible for explicitly checking the ordinal corresponding to a key via
+ * {@link MultiMapWithOrdinal#getOrdinal(Object)} before attempting to execute a lookup
+ * with an ordinal.
+ *
+ * @param <K> key type
+ * @param <V> value type
+ */
+public interface MapWithOrdinal<K, V> {
+ V getByOrdinal(int id);
+
+ int getOrdinal(K key);
+
+ int size();
+
+ boolean isEmpty();
+
+ V get(K key);
+
+ Collection<V> getAll(K key);
+
+ boolean put(K key, V value, boolean overwrite);
+
+ Collection<V> values();
+
+ boolean containsKey(K key);
+
+ boolean remove(K key, V value);
+
+ boolean removeAll(K key);
+
+ void clear();
+
+ Set<K> keys();
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinalImpl.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinalImpl.java
new file mode 100644
index 000000000..41ce1fc0d
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinalImpl.java
@@ -0,0 +1,248 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import java.util.AbstractMap;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+import java.util.stream.StreamSupport;
+
+import org.apache.arrow.util.Preconditions;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import io.netty.util.collection.IntObjectHashMap;
+import io.netty.util.collection.IntObjectMap;
+
+/**
+ * An implementation of map that supports constant time look-up by a generic key or an ordinal.
+ *
+ * <p>This class extends the functionality a regular {@link Map} with ordinal lookup support.
+ * Upon insertion an unused ordinal is assigned to the inserted (key, value) tuple.
+ * Upon update the same ordinal id is re-used while value is replaced.
+ * Upon deletion of an existing item, its corresponding ordinal is recycled and could be used by another item.
+ *
+ * <p>For any instance with N items, this implementation guarantees that ordinals are in the range of [0, N). However,
+ * the ordinal assignment is dynamic and may change after an insertion or deletion. Consumers of this class are
+ * responsible for explicitly checking the ordinal corresponding to a key via
+ * {@link MapWithOrdinalImpl#getOrdinal(Object)} before attempting to execute a lookup
+ * with an ordinal.
+ *
+ * @param <K> key type
+ * @param <V> value type
+ */
+public class MapWithOrdinalImpl<K, V> implements MapWithOrdinal<K, V> {
+ private static final Logger logger = LoggerFactory.getLogger(MapWithOrdinalImpl.class);
+
+ private final Map<K, Map.Entry<Integer, V>> primary = new HashMap<>();
+ private final IntObjectHashMap<V> secondary = new IntObjectHashMap<>();
+
+ private final Map<K, V> delegate = new Map<K, V>() {
+ @Override
+ public boolean isEmpty() {
+ return size() == 0;
+ }
+
+ @Override
+ public int size() {
+ return primary.size();
+ }
+
+ @Override
+ public boolean containsKey(Object key) {
+ return primary.containsKey(key);
+ }
+
+ @Override
+ public boolean containsValue(Object value) {
+ return primary.containsValue(value);
+ }
+
+ @Override
+ public V get(Object key) {
+ Entry<Integer, V> pair = primary.get(key);
+ if (pair != null) {
+ return pair.getValue();
+ }
+ return null;
+ }
+
+ @Override
+ public V put(K key, V value) {
+ final Entry<Integer, V> oldPair = primary.get(key);
+ // if key exists try replacing otherwise, assign a new ordinal identifier
+ final int ordinal = oldPair == null ? primary.size() : oldPair.getKey();
+ primary.put(key, new AbstractMap.SimpleImmutableEntry<>(ordinal, value));
+ secondary.put(ordinal, value);
+ return oldPair == null ? null : oldPair.getValue();
+ }
+
+ public boolean put(K key, V value, boolean override) {
+ return put(key, value) != null;
+ }
+
+ @Override
+ public V remove(Object key) {
+ final Entry<Integer, V> oldPair = primary.remove(key);
+ if (oldPair != null) {
+ final int lastOrdinal = secondary.size();
+ final V last = secondary.get(lastOrdinal);
+ // normalize mappings so that all numbers until primary.size() is assigned
+ // swap the last element with the deleted one
+ secondary.put(oldPair.getKey(), last);
+ primary.put((K) key, new AbstractMap.SimpleImmutableEntry<>(oldPair.getKey(), last));
+ }
+ return oldPair == null ? null : oldPair.getValue();
+ }
+
+ @Override
+ public void putAll(Map<? extends K, ? extends V> m) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void clear() {
+ primary.clear();
+ secondary.clear();
+ }
+
+ @Override
+ public Set<K> keySet() {
+ return primary.keySet();
+ }
+
+ @Override
+ public Collection<V> values() {
+ return StreamSupport.stream(secondary.entries().spliterator(), false)
+ .map((IntObjectMap.PrimitiveEntry<V> t) -> Preconditions.checkNotNull(t).value())
+ .collect(Collectors.toList());
+ }
+
+ @Override
+ public Set<Entry<K, V>> entrySet() {
+ return primary.entrySet().stream()
+ .map(entry -> new AbstractMap.SimpleImmutableEntry<>(entry.getKey(), entry.getValue().getValue()))
+ .collect(Collectors.toSet());
+ }
+ };
+
+ /**
+ * Returns the value corresponding to the given ordinal.
+ *
+ * @param id ordinal value for lookup
+ * @return an instance of V
+ */
+ public V getByOrdinal(int id) {
+ return secondary.get(id);
+ }
+
+ /**
+ * Returns the ordinal corresponding to the given key.
+ *
+ * @param key key for ordinal lookup
+ * @return ordinal value corresponding to key if it exists or -1
+ */
+ public int getOrdinal(K key) {
+ Map.Entry<Integer, V> pair = primary.get(key);
+ if (pair != null) {
+ return pair.getKey();
+ }
+ return -1;
+ }
+
+ @Override
+ public int size() {
+ return delegate.size();
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return delegate.isEmpty();
+ }
+
+ @Override
+ public Collection<V> getAll(K key) {
+ if (delegate.containsKey(key)) {
+ List<V> list = new ArrayList<>(1);
+ list.add(get(key));
+ return list;
+ }
+ return null;
+ }
+
+ @Override
+ public V get(K key) {
+ return delegate.get(key);
+ }
+
+ /**
+ * Inserts the tuple (key, value) into the map extending the semantics of {@link Map#put} with automatic ordinal
+ * assignment. A new ordinal is assigned if key does not exists. Otherwise the same ordinal is re-used but the value
+ * is replaced.
+ *
+ * @see java.util.Map#put
+ */
+ @Override
+ public boolean put(K key, V value, boolean overwrite) {
+ return delegate.put(key, value) != null;
+ }
+
+ @Override
+ public Collection<V> values() {
+ return delegate.values();
+ }
+
+ @Override
+ public boolean remove(K key, V value) {
+ return false;
+ }
+
+ @Override
+ public boolean containsKey(Object key) {
+ return delegate.containsKey(key);
+ }
+
+ /**
+ * Removes the element corresponding to the key if exists extending the semantics of {@link java.util.Map#remove}
+ * with ordinal re-cycling. The ordinal corresponding to the given key may be re-assigned to another tuple. It is
+ * important that consumer checks the ordinal value via
+ * {@link MapWithOrdinalImpl#getOrdinal(Object)} before attempting to look-up by ordinal.
+ *
+ * @see java.util.Map#remove
+ */
+ @Override
+ public boolean removeAll(K key) {
+ return delegate.remove(key) != null;
+ }
+
+ @Override
+ public void clear() {
+ delegate.clear();
+ }
+
+ @Override
+ public Set<K> keys() {
+ return delegate.keySet();
+ }
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/MultiMapWithOrdinal.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/MultiMapWithOrdinal.java
new file mode 100644
index 000000000..5fbb45a7a
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/MultiMapWithOrdinal.java
@@ -0,0 +1,230 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import io.netty.util.collection.IntObjectHashMap;
+
+/**
+ * An implementation of a multimap that supports constant time look-up by a generic key or an ordinal.
+ *
+ * <p>This class extends the functionality a regular {@link Map} with ordinal lookup support.
+ * Upon insertion an unused ordinal is assigned to the inserted (key, value) tuple.
+ * Upon update the same ordinal id is re-used while value is replaced.
+ * Upon deletion of an existing item, its corresponding ordinal is recycled and could be used by another item.
+ *
+ * <p>For any instance with N items, this implementation guarantees that ordinals are in the range of [0, N). However,
+ * the ordinal assignment is dynamic and may change after an insertion or deletion. Consumers of this class are
+ * responsible for explicitly checking the ordinal corresponding to a key via
+ * {@link MultiMapWithOrdinal#getOrdinal(Object)} before attempting to execute a lookup
+ * with an ordinal.
+ *
+ * @param <K> key type
+ * @param <V> value type
+ */
+public class MultiMapWithOrdinal<K, V> implements MapWithOrdinal<K, V> {
+
+ private final Map<K, Set<Integer>> keyToOrdinal = new LinkedHashMap<>();
+ private final IntObjectHashMap<V> ordinalToValue = new IntObjectHashMap<>();
+
+ /**
+ * Returns the value corresponding to the given ordinal.
+ *
+ * @param id ordinal value for lookup
+ * @return an instance of V
+ */
+ @Override
+ public V getByOrdinal(int id) {
+ return ordinalToValue.get(id);
+ }
+
+ /**
+ * Returns the ordinal corresponding to the given key.
+ *
+ * @param key key for ordinal lookup
+ * @return ordinal value corresponding to key if it exists or -1
+ */
+ @Override
+ public int getOrdinal(K key) {
+ Set<Integer> pair = getOrdinals(key);
+ if (!pair.isEmpty()) {
+ return pair.iterator().next();
+ }
+ return -1;
+ }
+
+ private Set<Integer> getOrdinals(K key) {
+ return keyToOrdinal.getOrDefault(key, new HashSet<>());
+ }
+
+ @Override
+ public int size() {
+ return ordinalToValue.size();
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return ordinalToValue.isEmpty();
+ }
+
+ /**
+ * get set of values for key.
+ */
+ @Override
+ public V get(K key) {
+ Set<Integer> ordinals = keyToOrdinal.get(key);
+ if (ordinals == null) {
+ return null;
+ }
+ return ordinals.stream().map(ordinalToValue::get).collect(Collectors.toList()).get(0);
+ }
+
+ /**
+ * get set of values for key.
+ */
+ @Override
+ public Collection<V> getAll(K key) {
+ Set<Integer> ordinals = keyToOrdinal.get(key);
+ if (ordinals == null) {
+ return null;
+ }
+ return ordinals.stream().map(ordinalToValue::get).collect(Collectors.toList());
+ }
+
+ /**
+ * Inserts the tuple (key, value) into the multimap with automatic ordinal assignment.
+ *
+ * A new ordinal is assigned if key/value pair does not exists.
+ *
+ * If overwrite is true the existing key will be overwritten with value else value will be appended to the multimap.
+ */
+ @Override
+ public boolean put(K key, V value, boolean overwrite) {
+ if (overwrite) {
+ removeAll(key);
+ }
+ Set<Integer> ordinalSet = getOrdinals(key);
+ int nextOrdinal = ordinalToValue.size();
+ ordinalToValue.put(nextOrdinal, value);
+ boolean changed = ordinalSet.add(nextOrdinal);
+ keyToOrdinal.put(key, ordinalSet);
+ return changed;
+ }
+
+ @Override
+ public Collection<V> values() {
+ return ordinalToValue.values();
+ }
+
+ @Override
+ public boolean containsKey(K key) {
+ return keyToOrdinal.containsKey(key);
+ }
+
+ /**
+ * Removes the element corresponding to the key/value if exists with ordinal re-cycling.
+ *
+ * The ordinal corresponding to the given key may be re-assigned to another tuple. It is
+ * important that consumer checks the ordinal value via
+ * {@link MultiMapWithOrdinal#getOrdinal(Object)} before attempting to look-up by ordinal.
+ *
+ * If the multimap is changed return true.
+ */
+ @Override
+ public synchronized boolean remove(K key, V value) {
+ Set<Integer> removalSet = getOrdinals(key);
+ if (removalSet.isEmpty()) {
+ return false;
+ }
+ Optional<V> removeValue = removalSet.stream().map(ordinalToValue::get).filter(value::equals).findFirst();
+ if (!removeValue.isPresent()) {
+ return false;
+ }
+ int removalOrdinal = removeKv(removalSet, key, value);
+ int lastOrdinal = ordinalToValue.size();
+ if (lastOrdinal != removalOrdinal) { //we didn't remove the last ordinal
+ swapOrdinal(lastOrdinal, removalOrdinal);
+ }
+ return true;
+ }
+
+ private void swapOrdinal(int lastOrdinal, int removalOrdinal) {
+ V swapOrdinalValue = ordinalToValue.remove(lastOrdinal);
+ ordinalToValue.put(removalOrdinal, swapOrdinalValue);
+ K swapOrdinalKey = keyToOrdinal.entrySet()
+ .stream()
+ .filter(kv -> kv.getValue().stream().anyMatch(o -> o == lastOrdinal))
+ .map(Map.Entry::getKey)
+ .findFirst()
+ .orElseThrow(() -> new IllegalStateException("MultimapWithOrdinal in bad state"));
+ ordinalToValue.put(removalOrdinal, swapOrdinalValue);
+ Set<Integer> swapSet = getOrdinals(swapOrdinalKey);
+ swapSet.remove(lastOrdinal);
+ swapSet.add(removalOrdinal);
+ keyToOrdinal.put(swapOrdinalKey, swapSet);
+ }
+
+ private int removeKv(Set<Integer> removalSet, K key, V value) {
+ Integer removalOrdinal = removalSet.stream()
+ .filter(i -> ordinalToValue.get(i).equals(value))
+ .findFirst()
+ .orElseThrow(() -> new IllegalStateException("MultimapWithOrdinal in bad state"));
+ ordinalToValue.remove(removalOrdinal);
+ removalSet.remove(removalOrdinal);
+ if (removalSet.isEmpty()) {
+ keyToOrdinal.remove(key);
+ } else {
+ keyToOrdinal.put(key, removalSet);
+ }
+ return removalOrdinal;
+ }
+
+ /**
+ * remove all entries of key.
+ */
+ @Override
+ public synchronized boolean removeAll(K key) {
+ Collection<V> values = this.getAll(key);
+ if (values == null) {
+ return false;
+ }
+ for (V v: values) {
+ this.remove(key, v);
+ }
+ return true;
+ }
+
+ @Override
+ public void clear() {
+ ordinalToValue.clear();
+ keyToOrdinal.clear();
+ }
+
+ @Override
+ public Set<K> keys() {
+ return keyToOrdinal.keySet();
+ }
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/OversizedAllocationException.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/OversizedAllocationException.java
new file mode 100644
index 000000000..a47d3ade0
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/OversizedAllocationException.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+
+/**
+ * An exception that is used to signal that allocation request in bytes is greater than the maximum allowed by
+ * {@link org.apache.arrow.memory.BufferAllocator#buffer(int) allocator}.
+ *
+ * <p>Operators should handle this exception to split the batch and later resume the execution on the next
+ * iteration.</p>
+ */
+public class OversizedAllocationException extends RuntimeException {
+ public OversizedAllocationException() {
+ super();
+ }
+
+ public OversizedAllocationException(
+ String message,
+ Throwable cause,
+ boolean enableSuppression,
+ boolean writableStackTrace) {
+ super(message, cause, enableSuppression, writableStackTrace);
+ }
+
+ public OversizedAllocationException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ public OversizedAllocationException(String message) {
+ super(message);
+ }
+
+ public OversizedAllocationException(Throwable cause) {
+ super(cause);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/PromotableMultiMapWithOrdinal.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/PromotableMultiMapWithOrdinal.java
new file mode 100644
index 000000000..f2f838af9
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/PromotableMultiMapWithOrdinal.java
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import java.util.Collection;
+import java.util.Set;
+
+import org.apache.arrow.vector.complex.AbstractStructVector;
+
+/**
+ * Implementation of MapWithOrdinal that allows for promotion to multimap when duplicate fields exist.
+ * @param <K> key type
+ * @param <V> value type
+ */
+public class PromotableMultiMapWithOrdinal<K, V> implements MapWithOrdinal<K, V> {
+ private final MapWithOrdinalImpl<K, V> mapWithOrdinal = new MapWithOrdinalImpl<>();
+ private final MultiMapWithOrdinal<K, V> multiMapWithOrdinal = new MultiMapWithOrdinal<>();
+ private final boolean promotable;
+ private AbstractStructVector.ConflictPolicy conflictPolicy;
+ private MapWithOrdinal<K, V> delegate;
+
+ /**
+ * Create promotable map.
+ * @param promotable if promotion is allowed, otherwise delegate to MapWithOrdinal.
+ * @param conflictPolicy how to handle name conflicts.
+ */
+ public PromotableMultiMapWithOrdinal(boolean promotable, AbstractStructVector.ConflictPolicy conflictPolicy) {
+ this.promotable = promotable;
+ this.conflictPolicy = conflictPolicy;
+ delegate = mapWithOrdinal;
+ }
+
+ private void promote() {
+ if (delegate == multiMapWithOrdinal ||
+ !promotable ||
+ conflictPolicy.equals(AbstractStructVector.ConflictPolicy.CONFLICT_REPLACE)) {
+ return;
+ }
+ for (K key : mapWithOrdinal.keys()) {
+ V value = mapWithOrdinal.get(key);
+ multiMapWithOrdinal.put(key, value, false);
+ }
+ mapWithOrdinal.clear();
+ delegate = multiMapWithOrdinal;
+ }
+
+ @Override
+ public V getByOrdinal(int id) {
+ return delegate.getByOrdinal(id);
+ }
+
+ @Override
+ public int getOrdinal(K key) {
+ return delegate.getOrdinal(key);
+ }
+
+ @Override
+ public int size() {
+ return delegate.size();
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return delegate.isEmpty();
+ }
+
+ @Override
+ public V get(K key) {
+ return delegate.get(key);
+ }
+
+ @Override
+ public Collection<V> getAll(K key) {
+ return delegate.getAll(key);
+ }
+
+ @Override
+ public boolean put(K key, V value, boolean overwrite) {
+ if (delegate.containsKey(key)) {
+ promote();
+ }
+ return delegate.put(key, value, overwrite);
+ }
+
+ @Override
+ public Collection<V> values() {
+ return delegate.values();
+ }
+
+ @Override
+ public boolean containsKey(K key) {
+ return delegate.containsKey(key);
+ }
+
+ @Override
+ public boolean remove(K key, V value) {
+ return delegate.remove(key, value);
+ }
+
+ @Override
+ public boolean removeAll(K key) {
+ return delegate.removeAll(key);
+ }
+
+ @Override
+ public void clear() {
+ delegate.clear();
+ }
+
+ @Override
+ public Set<K> keys() {
+ return delegate.keys();
+ }
+
+ public void setConflictPolicy(AbstractStructVector.ConflictPolicy conflictPolicy) {
+ this.conflictPolicy = conflictPolicy;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/SchemaChangeRuntimeException.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/SchemaChangeRuntimeException.java
new file mode 100644
index 000000000..c29eb6ad3
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/SchemaChangeRuntimeException.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+
+/**
+ * Thrown when child vectors (e.g. in lists) don't match the expected type.
+ */
+public class SchemaChangeRuntimeException extends RuntimeException {
+ public SchemaChangeRuntimeException() {
+ super();
+ }
+
+ public SchemaChangeRuntimeException(
+ String message,
+ Throwable cause,
+ boolean enableSuppression,
+ boolean writableStackTrace) {
+ super(message, cause, enableSuppression, writableStackTrace);
+ }
+
+ public SchemaChangeRuntimeException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ public SchemaChangeRuntimeException(String message) {
+ super(message);
+ }
+
+ public SchemaChangeRuntimeException(Throwable cause) {
+ super(cause);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/SchemaUtility.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/SchemaUtility.java
new file mode 100644
index 000000000..f8167604c
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/SchemaUtility.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.channels.Channels;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.ipc.ReadChannel;
+import org.apache.arrow.vector.ipc.WriteChannel;
+import org.apache.arrow.vector.ipc.message.MessageChannelReader;
+import org.apache.arrow.vector.ipc.message.MessageResult;
+import org.apache.arrow.vector.ipc.message.MessageSerializer;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+/**
+ * Schema utility class including serialization and deserialization.
+ */
+public class SchemaUtility {
+ private SchemaUtility() {}
+
+ /**
+ * Deserialize Arrow schema from byte array.
+ */
+ public static Schema deserialize(byte[] bytes, BufferAllocator allocator) throws IOException {
+ try (MessageChannelReader schemaReader =
+ new MessageChannelReader(
+ new ReadChannel(
+ new ByteArrayReadableSeekableByteChannel(bytes)), allocator)) {
+
+ MessageResult result = schemaReader.readNext();
+ if (result == null) {
+ throw new IOException("Unexpected end of input. Missing schema.");
+ }
+ return MessageSerializer.deserializeSchema(result.getMessage());
+ }
+ }
+
+ /**
+ * Serialize Arrow schema into byte array.
+ */
+ public static byte[] serialize(Schema schema) throws IOException {
+ final ByteArrayOutputStream out = new ByteArrayOutputStream();
+ MessageSerializer.serialize(new WriteChannel(Channels.newChannel(out)), schema);
+ return out.toByteArray();
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/Text.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/Text.java
new file mode 100644
index 000000000..b479305c6
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/Text.java
@@ -0,0 +1,688 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import java.io.DataInput;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CharacterCodingException;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CodingErrorAction;
+import java.nio.charset.MalformedInputException;
+import java.text.CharacterIterator;
+import java.text.StringCharacterIterator;
+import java.util.Arrays;
+
+import com.fasterxml.jackson.core.JsonGenerationException;
+import com.fasterxml.jackson.core.JsonGenerator;
+import com.fasterxml.jackson.databind.SerializerProvider;
+import com.fasterxml.jackson.databind.annotation.JsonSerialize;
+import com.fasterxml.jackson.databind.ser.std.StdSerializer;
+
+/**
+ * A simplified byte wrapper similar to Hadoop's Text class without all the dependencies.
+ * Lifted from Hadoop 2.7.1
+ */
+@JsonSerialize(using = Text.TextSerializer.class)
+public class Text {
+
+ private static ThreadLocal<CharsetEncoder> ENCODER_FACTORY =
+ new ThreadLocal<CharsetEncoder>() {
+ @Override
+ protected CharsetEncoder initialValue() {
+ return Charset.forName("UTF-8").newEncoder()
+ .onMalformedInput(CodingErrorAction.REPORT)
+ .onUnmappableCharacter(CodingErrorAction.REPORT);
+ }
+ };
+
+ private static ThreadLocal<CharsetDecoder> DECODER_FACTORY =
+ new ThreadLocal<CharsetDecoder>() {
+ @Override
+ protected CharsetDecoder initialValue() {
+ return Charset.forName("UTF-8").newDecoder()
+ .onMalformedInput(CodingErrorAction.REPORT)
+ .onUnmappableCharacter(CodingErrorAction.REPORT);
+ }
+ };
+
+ private static final byte[] EMPTY_BYTES = new byte[0];
+
+ private byte[] bytes;
+ private int length;
+
+ public Text() {
+ bytes = EMPTY_BYTES;
+ }
+
+ /**
+ * Construct from a string.
+ *
+ * @param string initialize from that string
+ */
+ public Text(String string) {
+ set(string);
+ }
+
+ /**
+ * Construct from another text.
+ *
+ * @param utf8 initialize from that Text
+ */
+ public Text(Text utf8) {
+ set(utf8);
+ }
+
+ /**
+ * Construct from a byte array.
+ *
+ * @param utf8 initialize from that byte array
+ */
+ public Text(byte[] utf8) {
+ set(utf8);
+ }
+
+ /**
+ * Get a copy of the bytes that is exactly the length of the data. See {@link #getBytes()} for
+ * faster access to the underlying array.
+ *
+ * @return a copy of the underlying array
+ */
+ public byte[] copyBytes() {
+ byte[] result = new byte[length];
+ System.arraycopy(bytes, 0, result, 0, length);
+ return result;
+ }
+
+ /**
+ * Returns the raw bytes; however, only data up to {@link #getLength()} is valid. Please use
+ * {@link #copyBytes()} if you need the returned array to be precisely the length of the data.
+ *
+ * @return the underlying array
+ */
+ public byte[] getBytes() {
+ return bytes;
+ }
+
+ /**
+ * Get the number of bytes in the byte array.
+ *
+ * @return the number of bytes in the byte array
+ */
+ public int getLength() {
+ return length;
+ }
+
+ /**
+ * Returns the Unicode Scalar Value (32-bit integer value) for the character at
+ * <code>position</code>. Note that this method avoids using the converter or doing String
+ * instantiation.
+ *
+ * @param position the index of the char we want to retrieve
+ * @return the Unicode scalar value at position or -1 if the position is invalid or points to a
+ * trailing byte
+ */
+ public int charAt(int position) {
+ if (position > this.length) {
+ return -1; // too long
+ }
+ if (position < 0) {
+ return -1; // duh.
+ }
+
+ ByteBuffer bb = (ByteBuffer) ByteBuffer.wrap(bytes).position(position);
+ return bytesToCodePoint(bb.slice());
+ }
+
+ public int find(String what) {
+ return find(what, 0);
+ }
+
+ /**
+ * Finds any occurrence of <code>what</code> in the backing buffer, starting as position
+ * <code>start</code>. The starting position is measured in bytes and the return value is in terms
+ * of byte position in the buffer. The backing buffer is not converted to a string for this
+ * operation.
+ *
+ * @param what the string to search for
+ * @param start where to start from
+ * @return byte position of the first occurrence of the search string in the UTF-8 buffer or -1
+ * if not found
+ */
+ public int find(String what, int start) {
+ try {
+ ByteBuffer src = ByteBuffer.wrap(this.bytes, 0, this.length);
+ ByteBuffer tgt = encode(what);
+ byte b = tgt.get();
+ src.position(start);
+
+ while (src.hasRemaining()) {
+ if (b == src.get()) { // matching first byte
+ src.mark(); // save position in loop
+ tgt.mark(); // save position in target
+ boolean found = true;
+ int pos = src.position() - 1;
+ while (tgt.hasRemaining()) {
+ if (!src.hasRemaining()) { // src expired first
+ tgt.reset();
+ src.reset();
+ found = false;
+ break;
+ }
+ if (!(tgt.get() == src.get())) {
+ tgt.reset();
+ src.reset();
+ found = false;
+ break; // no match
+ }
+ }
+ if (found) {
+ return pos;
+ }
+ }
+ }
+ return -1; // not found
+ } catch (CharacterCodingException e) {
+ // can't get here
+ e.printStackTrace();
+ return -1;
+ }
+ }
+
+ /**
+ * Set to contain the contents of a string.
+ *
+ * @param string the string to initialize from
+ */
+ public void set(String string) {
+ try {
+ ByteBuffer bb = encode(string, true);
+ bytes = bb.array();
+ length = bb.limit();
+ } catch (CharacterCodingException e) {
+ throw new RuntimeException("Should not have happened ", e);
+ }
+ }
+
+ /**
+ * Set to a utf8 byte array.
+ *
+ * @param utf8 the byte array to initialize from
+ */
+ public void set(byte[] utf8) {
+ set(utf8, 0, utf8.length);
+ }
+
+ /**
+ * copy a text.
+ *
+ * @param other the text to initialize from
+ */
+ public void set(Text other) {
+ set(other.getBytes(), 0, other.getLength());
+ }
+
+ /**
+ * Set the Text to range of bytes.
+ *
+ * @param utf8 the data to copy from
+ * @param start the first position of the new string
+ * @param len the number of bytes of the new string
+ */
+ public void set(byte[] utf8, int start, int len) {
+ setCapacity(len, false);
+ System.arraycopy(utf8, start, bytes, 0, len);
+ this.length = len;
+ }
+
+ /**
+ * Append a range of bytes to the end of the given text.
+ *
+ * @param utf8 the data to copy from
+ * @param start the first position to append from utf8
+ * @param len the number of bytes to append
+ */
+ public void append(byte[] utf8, int start, int len) {
+ setCapacity(length + len, true);
+ System.arraycopy(utf8, start, bytes, length, len);
+ length += len;
+ }
+
+ /**
+ * Clear the string to empty.
+ *
+ * <em>Note</em>: For performance reasons, this call does not clear the underlying byte array that
+ * is retrievable via {@link #getBytes()}. In order to free the byte-array memory, call
+ * {@link #set(byte[])} with an empty byte array (For example, <code>new byte[0]</code>).
+ */
+ public void clear() {
+ length = 0;
+ }
+
+ /**
+ * Sets the capacity of this Text object to <em>at least</em> <code>len</code> bytes. If the
+ * current buffer is longer, then the capacity and existing content of the buffer are unchanged.
+ * If <code>len</code> is larger than the current capacity, the Text object's capacity is
+ * increased to match.
+ *
+ * @param len the number of bytes we need
+ * @param keepData should the old data be kept
+ */
+ private void setCapacity(int len, boolean keepData) {
+ if (bytes == null || bytes.length < len) {
+ if (bytes != null && keepData) {
+ bytes = Arrays.copyOf(bytes, Math.max(len, length << 1));
+ } else {
+ bytes = new byte[len];
+ }
+ }
+ }
+
+ @Override
+ public String toString() {
+ try {
+ return decode(bytes, 0, length);
+ } catch (CharacterCodingException e) {
+ throw new RuntimeException("Should not have happened ", e);
+ }
+ }
+
+ /**
+ * Read a Text object whose length is already known. This allows creating Text from a stream which
+ * uses a different serialization format.
+ *
+ * @param in the input to initialize from
+ * @param len how many bytes to read from in
+ * @throws IOException if something bad happens
+ */
+ public void readWithKnownLength(DataInput in, int len) throws IOException {
+ setCapacity(len, false);
+ in.readFully(bytes, 0, len);
+ length = len;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (o == this) {
+ return true;
+ } else if (o == null) {
+ return false;
+ }
+ if (!(o instanceof Text)) {
+ return false;
+ }
+
+ final Text that = (Text) o;
+ if (this.getLength() != that.getLength()) {
+ return false;
+ }
+
+ // copied from Arrays.equals so we don'thave to copy the byte arrays
+ for (int i = 0; i < length; i++) {
+ if (bytes[i] != that.bytes[i]) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ /**
+ * Copied from Arrays.hashCode so we don't have to copy the byte array.
+ *
+ * @return hashCode
+ */
+ @Override
+ public int hashCode() {
+ if (bytes == null) {
+ return 0;
+ }
+
+ int result = 1;
+ for (int i = 0; i < length; i++) {
+ result = 31 * result + bytes[i];
+ }
+
+ return result;
+ }
+
+ // / STATIC UTILITIES FROM HERE DOWN
+
+ /**
+ * Converts the provided byte array to a String using the UTF-8 encoding. If the input is
+ * malformed, replace by a default value.
+ *
+ * @param utf8 bytes to decode
+ * @return the decoded string
+ * @throws CharacterCodingException if this is not valid UTF-8
+ */
+ public static String decode(byte[] utf8) throws CharacterCodingException {
+ return decode(ByteBuffer.wrap(utf8), true);
+ }
+
+ public static String decode(byte[] utf8, int start, int length)
+ throws CharacterCodingException {
+ return decode(ByteBuffer.wrap(utf8, start, length), true);
+ }
+
+ /**
+ * Converts the provided byte array to a String using the UTF-8 encoding. If <code>replace</code>
+ * is true, then malformed input is replaced with the substitution character, which is U+FFFD.
+ * Otherwise the method throws a MalformedInputException.
+ *
+ * @param utf8 the bytes to decode
+ * @param start where to start from
+ * @param length length of the bytes to decode
+ * @param replace whether to replace malformed characters with U+FFFD
+ * @return the decoded string
+ * @throws CharacterCodingException if the input could not be decoded
+ */
+ public static String decode(byte[] utf8, int start, int length, boolean replace)
+ throws CharacterCodingException {
+ return decode(ByteBuffer.wrap(utf8, start, length), replace);
+ }
+
+ private static String decode(ByteBuffer utf8, boolean replace)
+ throws CharacterCodingException {
+ CharsetDecoder decoder = DECODER_FACTORY.get();
+ if (replace) {
+ decoder.onMalformedInput(
+ java.nio.charset.CodingErrorAction.REPLACE);
+ decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
+ }
+ String str = decoder.decode(utf8).toString();
+ // set decoder back to its default value: REPORT
+ if (replace) {
+ decoder.onMalformedInput(CodingErrorAction.REPORT);
+ decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
+ }
+ return str;
+ }
+
+ /**
+ * Converts the provided String to bytes using the UTF-8 encoding. If the input is malformed,
+ * invalid chars are replaced by a default value.
+ *
+ * @param string the string to encode
+ * @return ByteBuffer: bytes stores at ByteBuffer.array() and length is ByteBuffer.limit()
+ * @throws CharacterCodingException if the string could not be encoded
+ */
+ public static ByteBuffer encode(String string)
+ throws CharacterCodingException {
+ return encode(string, true);
+ }
+
+ /**
+ * Converts the provided String to bytes using the UTF-8 encoding. If <code>replace</code> is
+ * true, then malformed input is replaced with the substitution character, which is U+FFFD.
+ * Otherwise the method throws a MalformedInputException.
+ *
+ * @param string the string to encode
+ * @param replace whether to replace malformed characters with U+FFFD
+ * @return ByteBuffer: bytes stores at ByteBuffer.array() and length is ByteBuffer.limit()
+ * @throws CharacterCodingException if the string could not be encoded
+ */
+ public static ByteBuffer encode(String string, boolean replace)
+ throws CharacterCodingException {
+ CharsetEncoder encoder = ENCODER_FACTORY.get();
+ if (replace) {
+ encoder.onMalformedInput(CodingErrorAction.REPLACE);
+ encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
+ }
+ ByteBuffer bytes =
+ encoder.encode(CharBuffer.wrap(string.toCharArray()));
+ if (replace) {
+ encoder.onMalformedInput(CodingErrorAction.REPORT);
+ encoder.onUnmappableCharacter(CodingErrorAction.REPORT);
+ }
+ return bytes;
+ }
+
+ public static final int DEFAULT_MAX_LEN = 1024 * 1024;
+
+ // //// states for validateUTF8
+
+ private static final int LEAD_BYTE = 0;
+
+ private static final int TRAIL_BYTE_1 = 1;
+
+ private static final int TRAIL_BYTE = 2;
+
+ /**
+ * Check if a byte array contains valid utf-8.
+ *
+ * @param utf8 byte array
+ * @throws MalformedInputException if the byte array contains invalid utf-8
+ */
+ public static void validateUTF8(byte[] utf8) throws MalformedInputException {
+ validateUTF8(utf8, 0, utf8.length);
+ }
+
+ /**
+ * Check to see if a byte array is valid utf-8.
+ *
+ * @param utf8 the array of bytes
+ * @param start the offset of the first byte in the array
+ * @param len the length of the byte sequence
+ * @throws MalformedInputException if the byte array contains invalid bytes
+ */
+ public static void validateUTF8(byte[] utf8, int start, int len)
+ throws MalformedInputException {
+ int count = start;
+ int leadByte = 0;
+ int length = 0;
+ int state = LEAD_BYTE;
+ while (count < start + len) {
+ int aByte = utf8[count] & 0xFF;
+
+ switch (state) {
+ case LEAD_BYTE:
+ leadByte = aByte;
+ length = bytesFromUTF8[aByte];
+
+ switch (length) {
+ case 0: // check for ASCII
+ if (leadByte > 0x7F) {
+ throw new MalformedInputException(count);
+ }
+ break;
+ case 1:
+ if (leadByte < 0xC2 || leadByte > 0xDF) {
+ throw new MalformedInputException(count);
+ }
+ state = TRAIL_BYTE_1;
+ break;
+ case 2:
+ if (leadByte < 0xE0 || leadByte > 0xEF) {
+ throw new MalformedInputException(count);
+ }
+ state = TRAIL_BYTE_1;
+ break;
+ case 3:
+ if (leadByte < 0xF0 || leadByte > 0xF4) {
+ throw new MalformedInputException(count);
+ }
+ state = TRAIL_BYTE_1;
+ break;
+ default:
+ // too long! Longest valid UTF-8 is 4 bytes (lead + three)
+ // or if < 0 we got a trail byte in the lead byte position
+ throw new MalformedInputException(count);
+ } // switch (length)
+ break;
+
+ case TRAIL_BYTE_1:
+ if (leadByte == 0xF0 && aByte < 0x90) {
+ throw new MalformedInputException(count);
+ }
+ if (leadByte == 0xF4 && aByte > 0x8F) {
+ throw new MalformedInputException(count);
+ }
+ if (leadByte == 0xE0 && aByte < 0xA0) {
+ throw new MalformedInputException(count);
+ }
+ if (leadByte == 0xED && aByte > 0x9F) {
+ throw new MalformedInputException(count);
+ }
+ // falls through to regular trail-byte test!!
+ case TRAIL_BYTE:
+ if (aByte < 0x80 || aByte > 0xBF) {
+ throw new MalformedInputException(count);
+ }
+ if (--length == 0) {
+ state = LEAD_BYTE;
+ } else {
+ state = TRAIL_BYTE;
+ }
+ break;
+ default:
+ break;
+ } // switch (state)
+ count++;
+ }
+ }
+
+ /**
+ * Magic numbers for UTF-8. These are the number of bytes that <em>follow</em> a given lead byte.
+ * Trailing bytes have the value -1. The values 4 and 5 are presented in this table, even though
+ * valid UTF-8 cannot include the five and six byte sequences.
+ */
+ static final int[] bytesFromUTF8 =
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0,
+ // trail bytes
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
+ 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5};
+
+ /**
+ * Returns the next code point at the current position in the buffer. The buffer's position will
+ * be incremented. Any mark set on this buffer will be changed by this method!
+ *
+ * @param bytes the incoming bytes
+ * @return the corresponding unicode codepoint
+ */
+ public static int bytesToCodePoint(ByteBuffer bytes) {
+ bytes.mark();
+ byte b = bytes.get();
+ bytes.reset();
+ int extraBytesToRead = bytesFromUTF8[(b & 0xFF)];
+ if (extraBytesToRead < 0) {
+ return -1; // trailing byte!
+ }
+ int ch = 0;
+
+ switch (extraBytesToRead) {
+ case 5:
+ ch += (bytes.get() & 0xFF);
+ ch <<= 6; /* remember, illegal UTF-8 */
+ // fall through
+ case 4:
+ ch += (bytes.get() & 0xFF);
+ ch <<= 6; /* remember, illegal UTF-8 */
+ // fall through
+ case 3:
+ ch += (bytes.get() & 0xFF);
+ ch <<= 6;
+ // fall through
+ case 2:
+ ch += (bytes.get() & 0xFF);
+ ch <<= 6;
+ // fall through
+ case 1:
+ ch += (bytes.get() & 0xFF);
+ ch <<= 6;
+ // fall through
+ case 0:
+ ch += (bytes.get() & 0xFF);
+ break;
+ default: // do nothing
+ }
+ ch -= offsetsFromUTF8[extraBytesToRead];
+
+ return ch;
+ }
+
+ static final int[] offsetsFromUTF8 =
+ {0x00000000, 0x00003080, 0x000E2080, 0x03C82080, 0xFA082080, 0x82082080};
+
+ /**
+ * For the given string, returns the number of UTF-8 bytes required to encode the string.
+ *
+ * @param string text to encode
+ * @return number of UTF-8 bytes required to encode
+ */
+ public static int utf8Length(String string) {
+ CharacterIterator iter = new StringCharacterIterator(string);
+ char ch = iter.first();
+ int size = 0;
+ while (ch != CharacterIterator.DONE) {
+ if ((ch >= 0xD800) && (ch < 0xDC00)) {
+ // surrogate pair?
+ char trail = iter.next();
+ if ((trail > 0xDBFF) && (trail < 0xE000)) {
+ // valid pair
+ size += 4;
+ } else {
+ // invalid pair
+ size += 3;
+ iter.previous(); // rewind one
+ }
+ } else if (ch < 0x80) {
+ size++;
+ } else if (ch < 0x800) {
+ size += 2;
+ } else {
+ // ch < 0x10000, that is, the largest char value
+ size += 3;
+ }
+ ch = iter.next();
+ }
+ return size;
+ }
+
+ /**
+ * JSON serializer for {@link Text}.
+ */
+ public static class TextSerializer extends StdSerializer<Text> {
+
+ public TextSerializer() {
+ super(Text.class);
+ }
+
+ @Override
+ public void serialize(
+ Text text,
+ JsonGenerator jsonGenerator,
+ SerializerProvider serializerProvider) throws IOException, JsonGenerationException {
+ jsonGenerator.writeString(text.toString());
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/TransferPair.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/TransferPair.java
new file mode 100644
index 000000000..ca3876c7b
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/TransferPair.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import org.apache.arrow.vector.ValueVector;
+
+/**
+ * Interface for copying values between a pair of two vectors of the same type.
+ */
+public interface TransferPair {
+ void transfer();
+
+ void splitAndTransfer(int startIndex, int length);
+
+ ValueVector getTo();
+
+ void copyValueSafe(int from, int to);
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/Validator.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/Validator.java
new file mode 100644
index 000000000..741972b4a
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/Validator.java
@@ -0,0 +1,190 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Objects;
+
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+/**
+ * Utility class for validating arrow data structures.
+ */
+public class Validator {
+
+ /**
+ * Validate two arrow schemas are equal.
+ *
+ * @param schema1 the 1st schema to compare
+ * @param schema2 the 2nd schema to compare
+ * @throws IllegalArgumentException if they are different.
+ */
+ public static void compareSchemas(Schema schema1, Schema schema2) {
+ if (!schema2.equals(schema1)) {
+ throw new IllegalArgumentException("Different schemas:\n" + schema2 + "\n" + schema1);
+ }
+ }
+
+ /**
+ * Validate two Dictionary encodings and dictionaries with id's from the encodings.
+ */
+ public static void compareDictionaries(
+ List<DictionaryEncoding> encodings1,
+ List<DictionaryEncoding> encodings2,
+ DictionaryProvider provider1,
+ DictionaryProvider provider2) {
+
+ if (encodings1.size() != encodings2.size()) {
+ throw new IllegalArgumentException("Different dictionary encoding count:\n" +
+ encodings1.size() + "\n" + encodings2.size());
+ }
+
+ for (int i = 0; i < encodings1.size(); i++) {
+ if (!encodings1.get(i).equals(encodings2.get(i))) {
+ throw new IllegalArgumentException("Different dictionary encodings:\n" + encodings1.get(i) +
+ "\n" + encodings2.get(i));
+ }
+
+ long id = encodings1.get(i).getId();
+ Dictionary dict1 = provider1.lookup(id);
+ Dictionary dict2 = provider2.lookup(id);
+
+ if (dict1 == null || dict2 == null) {
+ throw new IllegalArgumentException("The DictionaryProvider did not contain the required " +
+ "dictionary with id: " + id + "\n" + dict1 + "\n" + dict2);
+ }
+
+ try {
+ compareFieldVectors(dict1.getVector(), dict2.getVector());
+ } catch (IllegalArgumentException e) {
+ throw new IllegalArgumentException("Different dictionaries:\n" + dict1 + "\n" + dict2, e);
+ }
+ }
+ }
+
+ /**
+ * Validate two arrow vectorSchemaRoot are equal.
+ *
+ * @param root1 the 1st schema to compare
+ * @param root2 the 2nd schema to compare
+ * @throws IllegalArgumentException if they are different.
+ */
+ public static void compareVectorSchemaRoot(VectorSchemaRoot root1, VectorSchemaRoot root2) {
+ compareSchemas(root2.getSchema(), root1.getSchema());
+ if (root1.getRowCount() != root2.getRowCount()) {
+ throw new IllegalArgumentException("Different row count:\n" + root1.getRowCount() + " != " + root2.getRowCount());
+ }
+ List<FieldVector> vectors1 = root1.getFieldVectors();
+ List<FieldVector> vectors2 = root2.getFieldVectors();
+ if (vectors1.size() != vectors2.size()) {
+ throw new IllegalArgumentException("Different column count:\n" + vectors1.toString() +
+ "\n!=\n" + vectors2.toString());
+ }
+ for (int i = 0; i < vectors1.size(); i++) {
+ compareFieldVectors(vectors1.get(i), vectors2.get(i));
+ }
+ }
+
+ /**
+ * Validate two arrow FieldVectors are equal.
+ *
+ * @param vector1 the 1st VectorField to compare
+ * @param vector2 the 2nd VectorField to compare
+ * @throws IllegalArgumentException if they are different
+ */
+ public static void compareFieldVectors(FieldVector vector1, FieldVector vector2) {
+ Field field1 = vector1.getField();
+ if (!field1.equals(vector2.getField())) {
+ throw new IllegalArgumentException("Different Fields:\n" + field1 + "\n!=\n" +
+ vector2.getField());
+ }
+ int valueCount = vector1.getValueCount();
+ if (valueCount != vector2.getValueCount()) {
+ throw new IllegalArgumentException("Different value count for field " + field1 + " : " +
+ valueCount + " != " + vector2.getValueCount());
+ }
+ for (int j = 0; j < valueCount; j++) {
+ Object obj1 = vector1.getObject(j);
+ Object obj2 = vector2.getObject(j);
+ if (!equals(field1.getType(), obj1, obj2)) {
+ throw new IllegalArgumentException(
+ "Different values in column:\n" + field1 + " at index " + j + ": " + obj1 + " != " + obj2);
+ }
+ }
+ }
+
+ static boolean equals(ArrowType type, final Object o1, final Object o2) {
+ if (type instanceof ArrowType.FloatingPoint) {
+ ArrowType.FloatingPoint fpType = (ArrowType.FloatingPoint) type;
+ switch (fpType.getPrecision()) {
+ case DOUBLE:
+ return equalEnough((Double) o1, (Double) o2);
+ case SINGLE:
+ return equalEnough((Float) o1, (Float) o2);
+ case HALF:
+ default:
+ throw new UnsupportedOperationException("unsupported precision: " + fpType);
+ }
+ } else if (type instanceof ArrowType.Binary || type instanceof ArrowType.LargeBinary ||
+ type instanceof ArrowType.FixedSizeBinary) {
+ return Arrays.equals((byte[]) o1, (byte[]) o2);
+ } else if (o1 instanceof byte[] && o2 instanceof byte[]) {
+ return Arrays.equals((byte[]) o1, (byte[]) o2);
+ }
+
+ return Objects.equals(o1, o2);
+ }
+
+ static boolean equalEnough(Float f1, Float f2) {
+ if (f1 == null || f2 == null) {
+ return f1 == null && f2 == null;
+ }
+ if (f1.isNaN()) {
+ return f2.isNaN();
+ }
+ if (f1.isInfinite()) {
+ return f2.isInfinite() && Math.signum(f1) == Math.signum(f2);
+ }
+ float average = Math.abs((f1 + f2) / 2);
+ float differenceScaled = Math.abs(f1 - f2) / (average == 0.0f ? 1f : average);
+ return differenceScaled < 1.0E-6f;
+ }
+
+ static boolean equalEnough(Double f1, Double f2) {
+ if (f1 == null || f2 == null) {
+ return f1 == null && f2 == null;
+ }
+ if (f1.isNaN()) {
+ return f2.isNaN();
+ }
+ if (f1.isInfinite()) {
+ return f2.isInfinite() && Math.signum(f1) == Math.signum(f2);
+ }
+ double average = Math.abs((f1 + f2) / 2);
+ double differenceScaled = Math.abs(f1 - f2) / (average == 0.0d ? 1d : average);
+ return differenceScaled < 1.0E-12d;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/ValueVectorUtility.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/ValueVectorUtility.java
new file mode 100644
index 000000000..ceb7081e1
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/ValueVectorUtility.java
@@ -0,0 +1,187 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import static org.apache.arrow.vector.validate.ValidateUtil.validateOrThrow;
+
+import java.util.function.BiFunction;
+
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.BaseFixedWidthVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.validate.ValidateVectorBufferVisitor;
+import org.apache.arrow.vector.validate.ValidateVectorDataVisitor;
+import org.apache.arrow.vector.validate.ValidateVectorTypeVisitor;
+
+/**
+ * Utility methods for {@link ValueVector}.
+ */
+public class ValueVectorUtility {
+
+ private ValueVectorUtility() {
+ }
+
+ /**
+ * Get the toString() representation of vector suitable for debugging.
+ * Note since vectors may have millions of values, this method only shows max 20 values.
+ * Examples as below (v represents value):
+ * <li>
+ * vector with 0 value:
+ * []
+ * </li>
+ * <li>
+ * vector with 5 values (no more than 20 values):
+ * [v0, v1, v2, v3, v4]
+ * </li>
+ * <li>
+ * vector with 100 values (more than 20 values):
+ * [v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, ..., v90, v91, v92, v93, v94, v95, v96, v97, v98, v99]
+ * </li>
+ */
+ public static <V extends ValueVector> String getToString(V vector, int start, int end) {
+ return getToString(vector, start, end, (v, i) -> v.getObject(i));
+ }
+
+ /**
+ * Get the toString() representation of vector suitable for debugging.
+ * Note since vectors may have millions of values, this method only shows at most 20 values.
+ * @param vector the vector for which to get toString representation.
+ * @param start the starting index, inclusive.
+ * @param end the end index, exclusive.
+ * @param valueToString the function to transform individual elements to strings.
+ */
+ public static <V extends ValueVector> String getToString(
+ V vector, int start, int end, BiFunction<V, Integer, Object> valueToString) {
+ Preconditions.checkNotNull(vector);
+ final int length = end - start;
+ Preconditions.checkArgument(length >= 0);
+ Preconditions.checkArgument(start >= 0);
+ Preconditions.checkArgument(end <= vector.getValueCount());
+
+ if (length == 0) {
+ return "[]";
+ }
+
+ final int window = 10;
+ boolean skipComma = false;
+
+ StringBuilder sb = new StringBuilder();
+ sb.append('[');
+ for (int i = start; i < end; i++) {
+ if (skipComma) {
+ skipComma = false;
+ }
+ if (i - start >= window && i < end - window) {
+ sb.append("...");
+ i = end - window - 1;
+ skipComma = true;
+ } else {
+ sb.append(valueToString.apply(vector, i));
+ }
+
+ if (i == end - 1) {
+ sb.append(']');
+ } else {
+ if (!skipComma) {
+ sb.append(',');
+ }
+ sb.append(' ');
+ }
+ }
+
+ return sb.toString();
+ }
+
+ /**
+ * Utility to validate vector in O(1) time.
+ */
+ public static void validate(ValueVector vector) {
+ Preconditions.checkNotNull(vector);
+
+ ValidateVectorTypeVisitor typeVisitor = new ValidateVectorTypeVisitor();
+ vector.accept(typeVisitor, null);
+
+ ValidateVectorBufferVisitor bufferVisitor = new ValidateVectorBufferVisitor();
+ vector.accept(bufferVisitor, null);
+ }
+
+ /**
+ * Utility to validate vector in O(n) time, where n is the value count.
+ */
+ public static void validateFull(ValueVector vector) {
+ validate(vector);
+
+ ValidateVectorDataVisitor dataVisitor = new ValidateVectorDataVisitor();
+ vector.accept(dataVisitor, null);
+ }
+
+ /**
+ * Utility to validate vector schema root in O(1) time.
+ */
+ public static void validate(VectorSchemaRoot root) {
+ Preconditions.checkNotNull(root);
+ int valueCount = root.getRowCount();
+ validateOrThrow(valueCount >= 0, "The row count of vector schema root %s is negative.", valueCount);
+ for (ValueVector childVec : root.getFieldVectors()) {
+ validateOrThrow(valueCount == childVec.getValueCount(),
+ "Child vector and vector schema root have different value counts. " +
+ "Child vector value count %s, vector schema root value count %s", childVec.getValueCount(), valueCount);
+ validate(childVec);
+ }
+ }
+
+ /**
+ * Utility to validate vector in O(n) time, where n is the value count.
+ */
+ public static void validateFull(VectorSchemaRoot root) {
+ Preconditions.checkNotNull(root);
+ int valueCount = root.getRowCount();
+ validateOrThrow(valueCount >= 0, "The row count of vector schema root %s is negative.", valueCount);
+ for (ValueVector childVec : root.getFieldVectors()) {
+ validateOrThrow(valueCount == childVec.getValueCount(),
+ "Child vector and vector schema root have different value counts. " +
+ "Child vector value count %s, vector schema root value count %s", childVec.getValueCount(), valueCount);
+ validateFull(childVec);
+ }
+ }
+
+ /**
+ * Pre allocate memory for BaseFixedWidthVector.
+ */
+ public static void preAllocate(VectorSchemaRoot root, int targetSize) {
+ for (ValueVector vector : root.getFieldVectors()) {
+ if (vector instanceof BaseFixedWidthVector) {
+ ((BaseFixedWidthVector) vector).allocateNew(targetSize);
+ }
+ }
+ }
+
+ /**
+ * Ensure capacity for BaseFixedWidthVector.
+ */
+ public static void ensureCapacity(VectorSchemaRoot root, int targetCapacity) {
+ for (ValueVector vector : root.getFieldVectors()) {
+ if (vector instanceof BaseFixedWidthVector) {
+ while (vector.getValueCapacity() < targetCapacity) {
+ vector.reAlloc();
+ }
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java
new file mode 100644
index 000000000..e5809e93e
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java
@@ -0,0 +1,542 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+
+import java.util.HashSet;
+
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.BaseFixedWidthVector;
+import org.apache.arrow.vector.BaseLargeVariableWidthVector;
+import org.apache.arrow.vector.BaseVariableWidthVector;
+import org.apache.arrow.vector.BitVectorHelper;
+import org.apache.arrow.vector.ExtensionTypeVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.compare.TypeEqualsVisitor;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.complex.DenseUnionVector;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.NonNullableStructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+
+import io.netty.util.internal.PlatformDependent;
+
+/**
+ * Utility to append two vectors together.
+ */
+class VectorAppender implements VectorVisitor<ValueVector, Void> {
+
+ /**
+ * The targetVector to be appended.
+ */
+ private final ValueVector targetVector;
+
+ private final TypeEqualsVisitor typeVisitor;
+
+ /**
+ * Constructs a new targetVector appender, with the given targetVector.
+ * @param targetVector the targetVector to be appended.
+ */
+ VectorAppender(ValueVector targetVector) {
+ this.targetVector = targetVector;
+ typeVisitor = new TypeEqualsVisitor(targetVector, false, true);
+ }
+
+ @Override
+ public ValueVector visit(BaseFixedWidthVector deltaVector, Void value) {
+ Preconditions.checkArgument(targetVector.getField().getType().equals(deltaVector.getField().getType()),
+ "The targetVector to append must have the same type as the targetVector being appended");
+
+ if (deltaVector.getValueCount() == 0) {
+ return targetVector; // optimization, nothing to append, return
+ }
+
+ int newValueCount = targetVector.getValueCount() + deltaVector.getValueCount();
+
+ // make sure there is enough capacity
+ while (targetVector.getValueCapacity() < newValueCount) {
+ targetVector.reAlloc();
+ }
+
+ // append validity buffer
+ BitVectorHelper.concatBits(
+ targetVector.getValidityBuffer(), targetVector.getValueCount(),
+ deltaVector.getValidityBuffer(), deltaVector.getValueCount(), targetVector.getValidityBuffer());
+
+ // append data buffer
+ PlatformDependent.copyMemory(deltaVector.getDataBuffer().memoryAddress(),
+ targetVector.getDataBuffer().memoryAddress() + deltaVector.getTypeWidth() * targetVector.getValueCount(),
+ deltaVector.getTypeWidth() * deltaVector.getValueCount());
+ targetVector.setValueCount(newValueCount);
+ return targetVector;
+ }
+
+ @Override
+ public ValueVector visit(BaseVariableWidthVector deltaVector, Void value) {
+ Preconditions.checkArgument(targetVector.getField().getType().equals(deltaVector.getField().getType()),
+ "The targetVector to append must have the same type as the targetVector being appended");
+
+ if (deltaVector.getValueCount() == 0) {
+ return targetVector; // nothing to append, return
+ }
+
+ int newValueCount = targetVector.getValueCount() + deltaVector.getValueCount();
+
+ int targetDataSize = targetVector.getOffsetBuffer().getInt(
+ (long) targetVector.getValueCount() * BaseVariableWidthVector.OFFSET_WIDTH);
+ int deltaDataSize = deltaVector.getOffsetBuffer().getInt(
+ (long) deltaVector.getValueCount() * BaseVariableWidthVector.OFFSET_WIDTH);
+ int newValueCapacity = targetDataSize + deltaDataSize;
+
+ // make sure there is enough capacity
+ while (targetVector.getValueCapacity() < newValueCount) {
+ targetVector.reAlloc();
+ }
+ while (targetVector.getDataBuffer().capacity() < newValueCapacity) {
+ ((BaseVariableWidthVector) targetVector).reallocDataBuffer();
+ }
+
+ // append validity buffer
+ BitVectorHelper.concatBits(
+ targetVector.getValidityBuffer(), targetVector.getValueCount(),
+ deltaVector.getValidityBuffer(), deltaVector.getValueCount(), targetVector.getValidityBuffer());
+
+ // append data buffer
+ PlatformDependent.copyMemory(deltaVector.getDataBuffer().memoryAddress(),
+ targetVector.getDataBuffer().memoryAddress() + targetDataSize, deltaDataSize);
+
+ // copy offset buffer
+ PlatformDependent.copyMemory(
+ deltaVector.getOffsetBuffer().memoryAddress() + BaseVariableWidthVector.OFFSET_WIDTH,
+ targetVector.getOffsetBuffer().memoryAddress() + (targetVector.getValueCount() + 1) *
+ BaseVariableWidthVector.OFFSET_WIDTH,
+ deltaVector.getValueCount() * BaseVariableWidthVector.OFFSET_WIDTH);
+
+ // increase each offset from the second buffer
+ for (int i = 0; i < deltaVector.getValueCount(); i++) {
+ int oldOffset = targetVector.getOffsetBuffer().getInt((long) (targetVector.getValueCount() + 1 + i) *
+ BaseVariableWidthVector.OFFSET_WIDTH);
+ targetVector.getOffsetBuffer().setInt(
+ (long) (targetVector.getValueCount() + 1 + i) *
+ BaseVariableWidthVector.OFFSET_WIDTH, oldOffset + targetDataSize);
+ }
+ ((BaseVariableWidthVector) targetVector).setLastSet(newValueCount - 1);
+ targetVector.setValueCount(newValueCount);
+ return targetVector;
+ }
+
+ @Override
+ public ValueVector visit(BaseLargeVariableWidthVector deltaVector, Void value) {
+ Preconditions.checkArgument(targetVector.getField().getType().equals(deltaVector.getField().getType()),
+ "The targetVector to append must have the same type as the targetVector being appended");
+
+ if (deltaVector.getValueCount() == 0) {
+ return targetVector; // nothing to append, return
+ }
+
+ int newValueCount = targetVector.getValueCount() + deltaVector.getValueCount();
+
+ long targetDataSize = targetVector.getOffsetBuffer().getLong(
+ (long) targetVector.getValueCount() * BaseLargeVariableWidthVector.OFFSET_WIDTH);
+ long deltaDataSize = deltaVector.getOffsetBuffer().getLong(
+ (long) deltaVector.getValueCount() * BaseLargeVariableWidthVector.OFFSET_WIDTH);
+ long newValueCapacity = targetDataSize + deltaDataSize;
+
+ // make sure there is enough capacity
+ while (targetVector.getValueCapacity() < newValueCount) {
+ targetVector.reAlloc();
+ }
+ while (targetVector.getDataBuffer().capacity() < newValueCapacity) {
+ ((BaseLargeVariableWidthVector) targetVector).reallocDataBuffer();
+ }
+
+ // append validity buffer
+ BitVectorHelper.concatBits(
+ targetVector.getValidityBuffer(), targetVector.getValueCount(),
+ deltaVector.getValidityBuffer(), deltaVector.getValueCount(), targetVector.getValidityBuffer());
+
+ // append data buffer
+ PlatformDependent.copyMemory(deltaVector.getDataBuffer().memoryAddress(),
+ targetVector.getDataBuffer().memoryAddress() + targetDataSize, deltaDataSize);
+
+ // copy offset buffer
+ PlatformDependent.copyMemory(
+ deltaVector.getOffsetBuffer().memoryAddress() + BaseLargeVariableWidthVector.OFFSET_WIDTH,
+ targetVector.getOffsetBuffer().memoryAddress() + (targetVector.getValueCount() + 1) *
+ BaseLargeVariableWidthVector.OFFSET_WIDTH,
+ deltaVector.getValueCount() * BaseLargeVariableWidthVector.OFFSET_WIDTH);
+
+ // increase each offset from the second buffer
+ for (int i = 0; i < deltaVector.getValueCount(); i++) {
+ long oldOffset = targetVector.getOffsetBuffer().getLong((long) (targetVector.getValueCount() + 1 + i) *
+ BaseLargeVariableWidthVector.OFFSET_WIDTH);
+ targetVector.getOffsetBuffer().setLong(
+ (long) (targetVector.getValueCount() + 1 + i) *
+ BaseLargeVariableWidthVector.OFFSET_WIDTH, oldOffset + targetDataSize);
+ }
+ ((BaseLargeVariableWidthVector) targetVector).setLastSet(newValueCount - 1);
+ targetVector.setValueCount(newValueCount);
+ return targetVector;
+ }
+
+ @Override
+ public ValueVector visit(ListVector deltaVector, Void value) {
+ Preconditions.checkArgument(typeVisitor.equals(deltaVector),
+ "The targetVector to append must have the same type as the targetVector being appended");
+
+ if (deltaVector.getValueCount() == 0) {
+ return targetVector; // nothing to append, return
+ }
+
+ int newValueCount = targetVector.getValueCount() + deltaVector.getValueCount();
+
+ int targetListSize = targetVector.getOffsetBuffer().getInt(
+ (long) targetVector.getValueCount() * ListVector.OFFSET_WIDTH);
+ int deltaListSize = deltaVector.getOffsetBuffer().getInt(
+ (long) deltaVector.getValueCount() * ListVector.OFFSET_WIDTH);
+
+ ListVector targetListVector = (ListVector) targetVector;
+
+ // make sure the underlying vector has value count set
+ targetListVector.getDataVector().setValueCount(targetListSize);
+ deltaVector.getDataVector().setValueCount(deltaListSize);
+
+ // make sure there is enough capacity
+ while (targetVector.getValueCapacity() < newValueCount) {
+ targetVector.reAlloc();
+ }
+
+ // append validity buffer
+ BitVectorHelper.concatBits(
+ targetVector.getValidityBuffer(), targetVector.getValueCount(),
+ deltaVector.getValidityBuffer(), deltaVector.getValueCount(), targetVector.getValidityBuffer());
+
+ // append offset buffer
+ PlatformDependent.copyMemory(deltaVector.getOffsetBuffer().memoryAddress() + ListVector.OFFSET_WIDTH,
+ targetVector.getOffsetBuffer().memoryAddress() + (targetVector.getValueCount() + 1) *
+ ListVector.OFFSET_WIDTH,
+ (long) deltaVector.getValueCount() * ListVector.OFFSET_WIDTH);
+
+ // increase each offset from the second buffer
+ for (int i = 0; i < deltaVector.getValueCount(); i++) {
+ int oldOffset = targetVector.getOffsetBuffer().getInt(
+ (long) (targetVector.getValueCount() + 1 + i) * ListVector.OFFSET_WIDTH);
+ targetVector.getOffsetBuffer().setInt((long) (targetVector.getValueCount() + 1 + i) * ListVector.OFFSET_WIDTH,
+ oldOffset + targetListSize);
+ }
+ targetListVector.setLastSet(newValueCount - 1);
+
+ // append underlying vectors
+ VectorAppender innerAppender = new VectorAppender(targetListVector.getDataVector());
+ deltaVector.getDataVector().accept(innerAppender, null);
+
+ targetVector.setValueCount(newValueCount);
+ return targetVector;
+ }
+
+ @Override
+ public ValueVector visit(LargeListVector deltaVector, Void value) {
+ Preconditions.checkArgument(typeVisitor.equals(deltaVector),
+ "The targetVector to append must have the same type as the targetVector being appended");
+
+ if (deltaVector.getValueCount() == 0) {
+ return targetVector; // nothing to append, return
+ }
+
+ int newValueCount = targetVector.getValueCount() + deltaVector.getValueCount();
+
+ long targetListSize = targetVector.getOffsetBuffer().getLong(
+ (long) targetVector.getValueCount() * LargeListVector.OFFSET_WIDTH);
+ long deltaListSize = deltaVector.getOffsetBuffer().getLong(
+ (long) deltaVector.getValueCount() * LargeListVector.OFFSET_WIDTH);
+
+ ListVector targetListVector = (ListVector) targetVector;
+
+ // make sure the underlying vector has value count set
+ // todo recheck these casts when int64 vectors are supported
+ targetListVector.getDataVector().setValueCount(checkedCastToInt(targetListSize));
+ deltaVector.getDataVector().setValueCount(checkedCastToInt(deltaListSize));
+
+ // make sure there is enough capacity
+ while (targetVector.getValueCapacity() < newValueCount) {
+ targetVector.reAlloc();
+ }
+
+ // append validity buffer
+ BitVectorHelper.concatBits(
+ targetVector.getValidityBuffer(), targetVector.getValueCount(),
+ deltaVector.getValidityBuffer(), deltaVector.getValueCount(), targetVector.getValidityBuffer());
+
+ // append offset buffer
+ PlatformDependent.copyMemory(deltaVector.getOffsetBuffer().memoryAddress() + ListVector.OFFSET_WIDTH,
+ targetVector.getOffsetBuffer().memoryAddress() + (targetVector.getValueCount() + 1) *
+ LargeListVector.OFFSET_WIDTH,
+ (long) deltaVector.getValueCount() * ListVector.OFFSET_WIDTH);
+
+ // increase each offset from the second buffer
+ for (int i = 0; i < deltaVector.getValueCount(); i++) {
+ long oldOffset = targetVector.getOffsetBuffer().getLong(
+ (long) (targetVector.getValueCount() + 1 + i) * LargeListVector.OFFSET_WIDTH);
+ targetVector.getOffsetBuffer().setLong((long) (targetVector.getValueCount() + 1 + i) *
+ LargeListVector.OFFSET_WIDTH, oldOffset + targetListSize);
+ }
+ targetListVector.setLastSet(newValueCount - 1);
+
+ // append underlying vectors
+ VectorAppender innerAppender = new VectorAppender(targetListVector.getDataVector());
+ deltaVector.getDataVector().accept(innerAppender, null);
+
+ targetVector.setValueCount(newValueCount);
+ return targetVector;
+ }
+
+ @Override
+ public ValueVector visit(FixedSizeListVector deltaVector, Void value) {
+ Preconditions.checkArgument(typeVisitor.equals(deltaVector),
+ "The vector to append must have the same type as the targetVector being appended");
+
+ if (deltaVector.getValueCount() == 0) {
+ return targetVector; // optimization, nothing to append, return
+ }
+
+ FixedSizeListVector targetListVector = (FixedSizeListVector) targetVector;
+
+ Preconditions.checkArgument(targetListVector.getListSize() == deltaVector.getListSize(),
+ "FixedSizeListVector must have the same list size to append");
+
+ int newValueCount = targetVector.getValueCount() + deltaVector.getValueCount();
+
+ int targetListSize = targetListVector.getValueCount() * targetListVector.getListSize();
+ int deltaListSize = deltaVector.getValueCount() * deltaVector.getListSize();
+
+ // make sure the underlying vector has value count set
+ targetListVector.getDataVector().setValueCount(targetListSize);
+ deltaVector.getDataVector().setValueCount(deltaListSize);
+
+ // make sure there is enough capacity
+ while (targetVector.getValueCapacity() < newValueCount) {
+ targetVector.reAlloc();
+ }
+
+ // append validity buffer
+ BitVectorHelper.concatBits(
+ targetVector.getValidityBuffer(), targetVector.getValueCount(),
+ deltaVector.getValidityBuffer(), deltaVector.getValueCount(), targetVector.getValidityBuffer());
+
+ // append underlying vectors
+ VectorAppender innerAppender = new VectorAppender(targetListVector.getDataVector());
+ deltaVector.getDataVector().accept(innerAppender, null);
+
+ targetVector.setValueCount(newValueCount);
+ return targetVector;
+ }
+
+ @Override
+ public ValueVector visit(NonNullableStructVector deltaVector, Void value) {
+ Preconditions.checkArgument(typeVisitor.equals(deltaVector),
+ "The vector to append must have the same type as the targetVector being appended");
+
+ if (deltaVector.getValueCount() == 0) {
+ return targetVector; // optimization, nothing to append, return
+ }
+
+ NonNullableStructVector targetStructVector = (NonNullableStructVector) targetVector;
+ int newValueCount = targetVector.getValueCount() + deltaVector.getValueCount();
+
+ // make sure there is enough capacity
+ while (targetVector.getValueCapacity() < newValueCount) {
+ targetVector.reAlloc();
+ }
+
+ // append validity buffer
+ BitVectorHelper.concatBits(
+ targetVector.getValidityBuffer(), targetVector.getValueCount(),
+ deltaVector.getValidityBuffer(), deltaVector.getValueCount(), targetVector.getValidityBuffer());
+
+ // append child vectors
+ for (int i = 0; i < targetStructVector.getChildrenFromFields().size(); i++) {
+ ValueVector targetChild = targetStructVector.getVectorById(i);
+ ValueVector deltaChild = deltaVector.getVectorById(i);
+
+ targetChild.setValueCount(targetStructVector.getValueCount());
+ deltaChild.setValueCount(deltaVector.getValueCount());
+
+ VectorAppender innerAppender = new VectorAppender(targetChild);
+ deltaChild.accept(innerAppender, null);
+ }
+
+ targetVector.setValueCount(newValueCount);
+ return targetVector;
+ }
+
+ @Override
+ public ValueVector visit(UnionVector deltaVector, Void value) {
+ // we only make sure that both vectors are union vectors.
+ Preconditions.checkArgument(targetVector.getMinorType() == deltaVector.getMinorType(),
+ "The vector to append must have the same type as the targetVector being appended");
+
+ if (deltaVector.getValueCount() == 0) {
+ return targetVector; // optimization, nothing to append, return
+ }
+
+ UnionVector targetUnionVector = (UnionVector) targetVector;
+ int newValueCount = targetVector.getValueCount() + deltaVector.getValueCount();
+
+ // make sure there is enough capacity
+ while (targetUnionVector.getValueCapacity() < newValueCount) {
+ targetUnionVector.reAlloc();
+ }
+
+ // append type buffers
+ PlatformDependent.copyMemory(deltaVector.getTypeBufferAddress(),
+ targetUnionVector.getTypeBufferAddress() + targetVector.getValueCount(),
+ deltaVector.getValueCount());
+
+ // build the hash set for all types
+ HashSet<Integer> targetTypes = new HashSet<>();
+ for (int i = 0; i < targetUnionVector.getValueCount(); i++) {
+ targetTypes.add(targetUnionVector.getTypeValue(i));
+ }
+ HashSet<Integer> deltaTypes = new HashSet<>();
+ for (int i = 0; i < deltaVector.getValueCount(); i++) {
+ deltaTypes.add(deltaVector.getTypeValue(i));
+ }
+
+ // append child vectors
+ for (int i = 0; i < Byte.MAX_VALUE; i++) {
+ if (targetTypes.contains(i) || deltaTypes.contains(i)) {
+ ValueVector targetChild = targetUnionVector.getVectorByType(i);
+ if (!targetTypes.contains(i)) {
+ // if the vector type does not exist in the target, it must be newly created
+ // and we must make sure it has enough capacity.
+ while (targetChild.getValueCapacity() < newValueCount) {
+ targetChild.reAlloc();
+ }
+ }
+
+ if (deltaTypes.contains(i)) {
+ // append child vectors
+ ValueVector deltaChild = deltaVector.getVectorByType(i);
+
+ targetChild.setValueCount(targetUnionVector.getValueCount());
+ deltaChild.setValueCount(deltaVector.getValueCount());
+
+ VectorAppender innerAppender = new VectorAppender(targetChild);
+ deltaChild.accept(innerAppender, null);
+ }
+ targetChild.setValueCount(newValueCount);
+ }
+ }
+
+ targetVector.setValueCount(newValueCount);
+ return targetVector;
+ }
+
+ @Override
+ public ValueVector visit(DenseUnionVector deltaVector, Void value) {
+ // we only make sure that both vectors are union vectors.
+ Preconditions.checkArgument(targetVector.getMinorType() == deltaVector.getMinorType(),
+ "The vector to append must have the same type as the targetVector being appended");
+
+ if (deltaVector.getValueCount() == 0) {
+ return targetVector; // optimization, nothing to append, return
+ }
+
+ DenseUnionVector targetDenseUnionVector = (DenseUnionVector) targetVector;
+ int newValueCount = targetVector.getValueCount() + deltaVector.getValueCount();
+
+ // make sure there is enough capacity
+ while (targetDenseUnionVector.getValueCapacity() < newValueCount) {
+ targetDenseUnionVector.reAlloc();
+ }
+
+ // append type buffers
+ PlatformDependent.copyMemory(deltaVector.getTypeBuffer().memoryAddress(),
+ targetDenseUnionVector.getTypeBuffer() .memoryAddress() + targetVector.getValueCount(),
+ deltaVector.getValueCount());
+
+ // append offset buffers
+ for (int i = 0; i < deltaVector.getValueCount(); i++) {
+ byte typeId = deltaVector.getTypeId(i);
+ ValueVector targetChildVector = targetDenseUnionVector.getVectorByType(typeId);
+ int offsetBase = targetChildVector == null ? 0 : targetChildVector.getValueCount();
+ int deltaOffset = deltaVector.getOffset(i);
+ long index = (long) (targetVector.getValueCount() + i) * DenseUnionVector.OFFSET_WIDTH;
+
+ targetVector.getOffsetBuffer().setInt(index, offsetBase + deltaOffset);
+ }
+
+ // append child vectors
+ for (int i = 0; i <= Byte.MAX_VALUE; i++) {
+ ValueVector targetChildVector = targetDenseUnionVector.getVectorByType((byte) i);
+ ValueVector deltaChildVector = deltaVector.getVectorByType((byte) i);
+
+ if (targetChildVector == null && deltaChildVector == null) {
+ // the type id is not registered in either vector, we are done.
+ continue;
+ } else if (targetChildVector == null && deltaChildVector != null) {
+ // first register a new child in the target vector
+ targetDenseUnionVector.registerNewTypeId(deltaChildVector.getField());
+ targetChildVector = targetDenseUnionVector.addVector(
+ (byte) i, deltaChildVector.getField().createVector(targetDenseUnionVector.getAllocator()));
+
+ // now we have both child vecors not null, we can append them.
+ VectorAppender childAppender = new VectorAppender(targetChildVector);
+ deltaChildVector.accept(childAppender, null);
+ } else if (targetChildVector != null && deltaChildVector == null) {
+ // the value only exists in the target vector, so we are done
+ continue;
+ } else {
+ // both child vectors are non-null
+
+ // first check vector types
+ TypeEqualsVisitor childTypeVisitor =
+ new TypeEqualsVisitor(targetChildVector, /* check name */ false, /* check meta data*/ false);
+ if (!childTypeVisitor.equals(deltaChildVector)) {
+ throw new IllegalArgumentException("dense union vectors have different child vector types with type id " + i);
+ }
+
+ // append child vectors
+ VectorAppender childAppender = new VectorAppender(targetChildVector);
+ deltaChildVector.accept(childAppender, null);
+ }
+ }
+
+ targetVector.setValueCount(newValueCount);
+ return targetVector;
+ }
+
+ @Override
+ public ValueVector visit(NullVector deltaVector, Void value) {
+ Preconditions.checkArgument(targetVector.getField().getType().equals(deltaVector.getField().getType()),
+ "The targetVector to append must have the same type as the targetVector being appended");
+ return targetVector;
+ }
+
+ @Override
+ public ValueVector visit(ExtensionTypeVector<?> deltaVector, Void value) {
+ ValueVector targetUnderlying = ((ExtensionTypeVector<?>) targetVector).getUnderlyingVector();
+ VectorAppender underlyingAppender = new VectorAppender(targetUnderlying);
+ deltaVector.getUnderlyingVector().accept(underlyingAppender, null);
+ return targetVector;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/VectorBatchAppender.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/VectorBatchAppender.java
new file mode 100644
index 000000000..570783d10
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/VectorBatchAppender.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import org.apache.arrow.vector.ValueVector;
+
+/**
+ * Utility to add vector values in batch.
+ */
+public class VectorBatchAppender {
+
+ /**
+ * Add value vectors in batch.
+ * @param targetVector the target vector.
+ * @param vectorsToAppend the vectors to append.
+ * @param <V> the vector type.
+ */
+ public static <V extends ValueVector> void batchAppend(V targetVector, V... vectorsToAppend) {
+ VectorAppender appender = new VectorAppender(targetVector);
+ for (V delta : vectorsToAppend) {
+ delta.accept(appender, null);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/VectorSchemaRootAppender.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/VectorSchemaRootAppender.java
new file mode 100644
index 000000000..3c6044ec5
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/VectorSchemaRootAppender.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.compare.TypeEqualsVisitor;
+
+/**
+ * Utility to append {@link org.apache.arrow.vector.VectorSchemaRoot}s with the same schema.
+ */
+public class VectorSchemaRootAppender {
+
+ /**
+ * Appends a number of {@link VectorSchemaRoot}s.
+ * @param checkSchema if we need to check schema for the vector schema roots.
+ * @param targetRoot the vector schema root to be appended.
+ * @param rootsToAppend the vector schema roots to append.
+ * @throws IllegalArgumentException throws if we need to check schema, and checking schema fails.
+ */
+ public static void append(boolean checkSchema, VectorSchemaRoot targetRoot, VectorSchemaRoot... rootsToAppend) {
+ // create appenders
+ VectorAppender[] appenders = new VectorAppender[targetRoot.getFieldVectors().size()];
+ for (int i = 0; i < appenders.length; i++) {
+ appenders[i] = new VectorAppender(targetRoot.getVector(i));
+ }
+
+ // create type checkers, if necessary
+ TypeEqualsVisitor[] typeCheckers = null;
+ if (checkSchema) {
+ typeCheckers = new TypeEqualsVisitor[targetRoot.getFieldVectors().size()];
+ for (int i = 0; i < typeCheckers.length; i++) {
+ typeCheckers[i] = new TypeEqualsVisitor(targetRoot.getVector(i),
+ /* check name */ false, /* check meta data */ false);
+ }
+ }
+
+ for (VectorSchemaRoot delta : rootsToAppend) {
+ // check schema, if necessary
+ if (checkSchema) {
+ if (delta.getFieldVectors().size() != targetRoot.getFieldVectors().size()) {
+ throw new IllegalArgumentException("Vector schema roots have different numbers of child vectors.");
+ }
+ for (int i = 0; i < typeCheckers.length; i++) {
+ if (!typeCheckers[i].equals(delta.getVector(i))) {
+ throw new IllegalArgumentException("Vector schema roots have different schemas.");
+ }
+ }
+ }
+
+ // append child vectors.
+ for (int i = 0; i < appenders.length; i++) {
+ delta.getVector(i).accept(appenders[i], null);
+ }
+ targetRoot.setRowCount(targetRoot.getRowCount() + delta.getRowCount());
+ }
+ }
+
+ /**
+ * Appends a number of {@link VectorSchemaRoot}s.
+ * This method performs schema checking before appending data.
+ * @param targetRoot the vector schema root to be appended.
+ * @param rootsToAppend the vector schema roots to append.
+ * @throws IllegalArgumentException throws if we need to check schema, and checking schema fails.
+ */
+ public static void append(VectorSchemaRoot targetRoot, VectorSchemaRoot... rootsToAppend) {
+ append(true, targetRoot, rootsToAppend);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/MetadataV4UnionChecker.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/MetadataV4UnionChecker.java
new file mode 100644
index 000000000..2a7068365
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/MetadataV4UnionChecker.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.validate;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.arrow.vector.types.MetadataVersion;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+/**
+ * Given a field, checks that no Union fields are present.
+ *
+ * This is intended to be used to prevent unions from being read/written with V4 metadata.
+ */
+public final class MetadataV4UnionChecker {
+ static boolean isUnion(Field field) {
+ return field.getType().getTypeID() == ArrowType.ArrowTypeID.Union;
+ }
+
+ static Field check(Field field) {
+ if (isUnion(field)) {
+ return field;
+ }
+ // Naive recursive DFS
+ for (final Field child : field.getChildren()) {
+ final Field result = check(child);
+ if (result != null) {
+ return result;
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Check the schema, raising an error if an unsupported feature is used (e.g. unions with < V5 metadata).
+ */
+ public static void checkForUnion(Iterator<Field> fields, MetadataVersion metadataVersion) {
+ if (metadataVersion.toFlatbufID() >= MetadataVersion.V5.toFlatbufID()) {
+ return;
+ }
+ while (fields.hasNext()) {
+ Field union = check(fields.next());
+ if (union != null) {
+ throw new IllegalArgumentException(
+ "Cannot write union with V4 metadata version, use V5 instead. Found field: " + union);
+ }
+ }
+ }
+
+ /**
+ * Check the schema, raising an error if an unsupported feature is used (e.g. unions with < V5 metadata).
+ */
+ public static void checkRead(Schema schema, MetadataVersion metadataVersion) throws IOException {
+ if (metadataVersion.toFlatbufID() >= MetadataVersion.V5.toFlatbufID()) {
+ return;
+ }
+ for (final Field field : schema.getFields()) {
+ Field union = check(field);
+ if (union != null) {
+ throw new IOException("Cannot read union with V4 metadata version. Found field: " + union);
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateUtil.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateUtil.java
new file mode 100644
index 000000000..e1b60e926
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateUtil.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.validate;
+
+/**
+ * Utilities for vector validation.
+ */
+public class ValidateUtil {
+
+ private ValidateUtil() {
+ }
+
+ /**
+ * Validate the expression.
+ * @param expression the expression to validate.
+ * @param errorMessage the error message.
+ * @throws ValidateException if the expression evaluates to false.
+ */
+ public static void validateOrThrow(boolean expression, String errorMessage) {
+ if (!expression) {
+ throw new ValidateException(errorMessage);
+ }
+ }
+
+ /**
+ * Validate the expression.
+ * @param expression the expression to validate.
+ * @param errorMessage the error message template.
+ * @param args the error message arguments.
+ * @throws ValidateException if the expression evaluates to false.
+ */
+ public static void validateOrThrow(boolean expression, String errorMessage, Object... args) {
+ if (!expression) {
+ throw new ValidateException(String.format(errorMessage, args));
+ }
+ }
+
+ /**
+ * A exception that is thrown when the vector validation fails.
+ */
+ public static class ValidateException extends RuntimeException {
+ public ValidateException(String message) {
+ super(message);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java
new file mode 100644
index 000000000..d4abaa194
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java
@@ -0,0 +1,246 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.validate;
+
+import static org.apache.arrow.vector.validate.ValidateUtil.validateOrThrow;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.vector.BaseFixedWidthVector;
+import org.apache.arrow.vector.BaseLargeVariableWidthVector;
+import org.apache.arrow.vector.BaseVariableWidthVector;
+import org.apache.arrow.vector.BitVector;
+import org.apache.arrow.vector.ExtensionTypeVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.TypeLayout;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.complex.DenseUnionVector;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.NonNullableStructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+
+/**
+ * Visitor to validate vector buffers.
+ */
+public class ValidateVectorBufferVisitor implements VectorVisitor<Void, Void> {
+
+ private void validateVectorCommon(ValueVector vector) {
+ ArrowType arrowType = vector.getField().getType();
+ validateOrThrow(vector.getValueCount() >= 0,
+ "Vector valueCount %s is negative.", vector.getValueCapacity());
+
+ if (vector instanceof FieldVector) {
+ FieldVector fieldVector = (FieldVector) vector;
+ int typeBufferCount = TypeLayout.getTypeBufferCount(arrowType);
+ validateOrThrow(fieldVector.getFieldBuffers().size() == typeBufferCount,
+ "Expected %s buffers in vector of type %s, got %s.",
+ typeBufferCount, vector.getField().getType().toString(), fieldVector.getFieldBuffers().size());
+ }
+ }
+
+ private void validateValidityBuffer(ValueVector vector, int valueCount) {
+ ArrowBuf validityBuffer = vector.getValidityBuffer();
+ validateOrThrow(validityBuffer != null, "The validity buffer is null.");
+ validateOrThrow(validityBuffer.capacity() * 8 >= valueCount,
+ "Not enough capacity for the validity buffer. Minimum capacity %s, actual capacity %s.",
+ (valueCount + 7) / 8, validityBuffer.capacity());
+ }
+
+ private void validateOffsetBuffer(ValueVector vector, long minCapacity) {
+ ArrowBuf offsetBuffer = vector.getOffsetBuffer();
+ validateOrThrow(offsetBuffer != null, "The offset buffer is null.");
+ validateOrThrow(offsetBuffer.capacity() >= minCapacity,
+ "Not enough capacity for the offset buffer. Minimum capacity %s, actual capacity %s.",
+ minCapacity, offsetBuffer.capacity());
+ }
+
+ private void validateFixedWidthDataBuffer(ValueVector vector, int valueCount, int bitWidth) {
+ ArrowBuf dataBuffer = vector.getDataBuffer();
+ validateOrThrow(dataBuffer != null, "The fixed width data buffer is null.");
+ validateOrThrow((long) bitWidth * valueCount <= dataBuffer.capacity() * 8L,
+ "Not enough capacity for fixed width data buffer. Minimum capacity %s, actual capacity %s.",
+ ((long) bitWidth * valueCount + 7L) / 8L, dataBuffer.capacity());
+ }
+
+ private void validateDataBuffer(ValueVector vector, long minCapacity) {
+ ArrowBuf dataBuffer = vector.getDataBuffer();
+ validateOrThrow(dataBuffer != null, "The data buffer is null.");
+ validateOrThrow(dataBuffer.capacity() >= minCapacity,
+ "Not enough capacity for data buffer. Minimum capacity %s, actual capacity %s.",
+ minCapacity, dataBuffer.capacity());
+ }
+
+ private void validateTypeBuffer(ArrowBuf typeBuf, long minCapacity) {
+ validateOrThrow(typeBuf != null, "The type buffer is null.");
+ validateOrThrow(typeBuf.capacity() >= minCapacity,
+ "Not enough capacity for type buffer. Minimum capacity %s, actual capacity %s.",
+ minCapacity, typeBuf.capacity());
+ }
+
+ @Override
+ public Void visit(BaseFixedWidthVector vector, Void value) {
+ int bitWidth = (vector instanceof BitVector) ? 1 : vector.getTypeWidth() * 8;
+ int valueCount = vector.getValueCount();
+ validateVectorCommon(vector);
+ validateValidityBuffer(vector, valueCount);
+ validateFixedWidthDataBuffer(vector, valueCount, bitWidth);
+ return null;
+ }
+
+ @Override
+ public Void visit(BaseVariableWidthVector vector, Void value) {
+ int valueCount = vector.getValueCount();
+ validateVectorCommon(vector);
+ validateValidityBuffer(vector, valueCount);
+ long minOffsetCapacity = valueCount == 0 ? 0L : (long) (valueCount + 1) * BaseVariableWidthVector.OFFSET_WIDTH;
+ validateOffsetBuffer(vector, minOffsetCapacity);
+ int lastOffset = valueCount == 0 ? 0 :
+ vector.getOffsetBuffer().getInt(valueCount * BaseVariableWidthVector.OFFSET_WIDTH);
+ validateDataBuffer(vector, lastOffset);
+ return null;
+ }
+
+ @Override
+ public Void visit(BaseLargeVariableWidthVector vector, Void value) {
+ int valueCount = vector.getValueCount();
+ validateVectorCommon(vector);
+ validateValidityBuffer(vector, valueCount);
+ long minOffsetCapacity = valueCount == 0 ? 0L
+ : (long) (valueCount + 1) * BaseLargeVariableWidthVector.OFFSET_WIDTH;
+ validateOffsetBuffer(vector, minOffsetCapacity);
+ long lastOffset = valueCount == 0 ? 0L :
+ vector.getOffsetBuffer().getLong((long) valueCount * BaseLargeVariableWidthVector.OFFSET_WIDTH);
+ validateDataBuffer(vector, lastOffset);
+ return null;
+ }
+
+ @Override
+ public Void visit(ListVector vector, Void value) {
+ int valueCount = vector.getValueCount();
+ validateVectorCommon(vector);
+ validateValidityBuffer(vector, valueCount);
+ long minOffsetCapacity = valueCount == 0 ? 0L : (long) (valueCount + 1) * ListVector.OFFSET_WIDTH;
+ validateOffsetBuffer(vector, minOffsetCapacity);
+
+ FieldVector dataVector = vector.getDataVector();
+ int lastOffset = valueCount == 0 ? 0 :
+ vector.getOffsetBuffer().getInt(valueCount * BaseVariableWidthVector.OFFSET_WIDTH);
+ int dataVectorLength = dataVector == null ? 0 : dataVector.getValueCount();
+ validateOrThrow(dataVectorLength >= lastOffset,
+ "Inner vector does not contain enough elements. Minimum element count %s, actual element count %s",
+ lastOffset + 1, dataVectorLength);
+
+ if (dataVector != null) {
+ dataVector.accept(this, null);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(FixedSizeListVector vector, Void value) {
+ int valueCount = vector.getValueCount();
+ validateVectorCommon(vector);
+ validateValidityBuffer(vector, valueCount);
+ FieldVector dataVector = vector.getDataVector();
+ int dataVectorLength = dataVector == null ? 0 : dataVector.getValueCount();
+ validateOrThrow(dataVectorLength >= valueCount * vector.getListSize(),
+ "Inner vector does not contain enough elements. Minimum element count %s, actual element count %s.",
+ valueCount * vector.getListSize(), dataVectorLength);
+ if (dataVector != null) {
+ dataVector.accept(this, null);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(LargeListVector vector, Void value) {
+ int valueCount = vector.getValueCount();
+ validateVectorCommon(vector);
+ validateValidityBuffer(vector, valueCount);
+ long minOffsetCapacity = valueCount == 0 ? 0L : (long) (valueCount + 1) * LargeListVector.OFFSET_WIDTH;
+ validateOffsetBuffer(vector, minOffsetCapacity);
+
+ FieldVector dataVector = vector.getDataVector();
+ long lastOffset = valueCount == 0 ? 0 :
+ vector.getOffsetBuffer().getLong(valueCount * BaseLargeVariableWidthVector.OFFSET_WIDTH);
+ int dataVectorLength = dataVector == null ? 0 : dataVector.getValueCount();
+ validateOrThrow(dataVectorLength >= lastOffset,
+ "Inner vector does not contain enough elements. Minimum element count %s, actual element count %s",
+ lastOffset + 1, dataVectorLength);
+
+ if (dataVector != null) {
+ dataVector.accept(this, null);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(NonNullableStructVector vector, Void value) {
+ int valueCount = vector.getValueCount();
+ validateVectorCommon(vector);
+ validateValidityBuffer(vector, valueCount);
+ for (ValueVector subVector : vector.getChildrenFromFields()) {
+ validateOrThrow(valueCount == subVector.getValueCount(),
+ "Struct vector length not equal to child vector length. Struct vector length %s, child vector length %s",
+ valueCount, subVector.getValueCount());
+ subVector.accept(this, null);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(UnionVector vector, Void value) {
+ int valueCount = vector.getValueCount();
+ validateVectorCommon(vector);
+ validateTypeBuffer(vector.getTypeBuffer(), valueCount * UnionVector.TYPE_WIDTH);
+ for (ValueVector subVector : vector.getChildrenFromFields()) {
+ validateOrThrow(valueCount == subVector.getValueCount(),
+ "Union vector length not equal to child vector length. Union vector length %s, child vector length %s",
+ valueCount, subVector.getValueCount());
+ subVector.accept(this, null);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(DenseUnionVector vector, Void value) {
+ int valueCount = vector.getValueCount();
+ validateVectorCommon(vector);
+ validateOffsetBuffer(vector, (long) valueCount * DenseUnionVector.OFFSET_WIDTH);
+ validateTypeBuffer(vector.getTypeBuffer(), valueCount * DenseUnionVector.TYPE_WIDTH);
+ for (ValueVector subVector : vector.getChildrenFromFields()) {
+ subVector.accept(this, null);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(NullVector vector, Void value) {
+ return null;
+ }
+
+ @Override
+ public Void visit(ExtensionTypeVector<?> vector, Void value) {
+ vector.getUnderlyingVector().accept(this, value);
+ return null;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorDataVisitor.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorDataVisitor.java
new file mode 100644
index 000000000..cdeb4f1ea
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorDataVisitor.java
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.validate;
+
+import static org.apache.arrow.vector.validate.ValidateUtil.validateOrThrow;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.vector.BaseFixedWidthVector;
+import org.apache.arrow.vector.BaseLargeVariableWidthVector;
+import org.apache.arrow.vector.BaseVariableWidthVector;
+import org.apache.arrow.vector.ExtensionTypeVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.complex.DenseUnionVector;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.NonNullableStructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+
+/**
+ * Utility for validating vector data.
+ */
+public class ValidateVectorDataVisitor implements VectorVisitor<Void, Void> {
+
+ private void validateOffsetBuffer(ValueVector vector, int valueCount) {
+ if (valueCount == 0) {
+ return;
+ }
+ ArrowBuf offsetBuffer = vector.getOffsetBuffer();
+
+ // verify that the values in the offset buffer is non-decreasing
+ int prevValue = offsetBuffer.getInt(0);
+ for (int i = 1; i <= valueCount; i++) {
+ int curValue = offsetBuffer.getInt(i * 4);
+ validateOrThrow(curValue >= 0, "The value at position %s of the offset buffer is negative: %s.", i, curValue);
+ validateOrThrow(curValue >= prevValue,
+ "The values in positions %s and %s of the offset buffer are decreasing: %s, %s.",
+ i - 1, i, prevValue, curValue);
+ prevValue = curValue;
+ }
+ }
+
+ private void validateLargeOffsetBuffer(ValueVector vector, int valueCount) {
+ if (valueCount == 0) {
+ return;
+ }
+ ArrowBuf offsetBuffer = vector.getOffsetBuffer();
+
+ // verify that the values in the large offset buffer is non-decreasing
+ long prevValue = offsetBuffer.getLong(0);
+ for (int i = 1; i <= valueCount; i++) {
+ long curValue = offsetBuffer.getLong((long) i * 8);
+ validateOrThrow(curValue >= 0L, "The value at position %s of the large offset buffer is negative: %s.",
+ i, curValue);
+ validateOrThrow(curValue >= prevValue,
+ "The values in positions %s and %s of the large offset buffer are decreasing: %s, %s.",
+ i - 1, i, prevValue, curValue);
+ prevValue = curValue;
+ }
+ }
+
+ private void validateTypeBuffer(ArrowBuf typeBuf, int valueCount) {
+ for (int i = 0; i < valueCount; i++) {
+ validateOrThrow(typeBuf.getByte(i) >= 0, "The type id at position %s is negative: %s.",
+ i, typeBuf.getByte(i));
+ }
+ }
+
+ @Override
+ public Void visit(BaseFixedWidthVector vector, Void value) {
+ return null;
+ }
+
+ @Override
+ public Void visit(BaseVariableWidthVector vector, Void value) {
+ validateOffsetBuffer(vector, vector.getValueCount());
+ return null;
+ }
+
+ @Override
+ public Void visit(BaseLargeVariableWidthVector vector, Void value) {
+ validateLargeOffsetBuffer(vector, vector.getValueCount());
+ return null;
+ }
+
+ @Override
+ public Void visit(ListVector vector, Void value) {
+ validateOffsetBuffer(vector, vector.getValueCount());
+ ValueVector innerVector = vector.getDataVector();
+ if (innerVector != null) {
+ innerVector.accept(this, null);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(FixedSizeListVector vector, Void value) {
+ validateOffsetBuffer(vector, vector.getValueCount());
+ ValueVector innerVector = vector.getDataVector();
+ if (innerVector != null) {
+ innerVector.accept(this, null);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(LargeListVector vector, Void value) {
+ validateLargeOffsetBuffer(vector, vector.getValueCount());
+ ValueVector innerVector = vector.getDataVector();
+ if (innerVector != null) {
+ innerVector.accept(this, null);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(NonNullableStructVector vector, Void value) {
+ for (ValueVector subVector : vector.getChildrenFromFields()) {
+ subVector.accept(this, null);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(UnionVector vector, Void value) {
+ validateTypeBuffer(vector.getTypeBuffer(), vector.getValueCount());
+ for (ValueVector subVector : vector.getChildrenFromFields()) {
+ subVector.accept(this, null);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(DenseUnionVector vector, Void value) {
+ validateTypeBuffer(vector.getTypeBuffer(), vector.getValueCount());
+
+ // validate offset buffer
+ for (int i = 0; i < vector.getValueCount(); i++) {
+ int offset = vector.getOffset(i);
+ byte typeId = vector.getTypeId(i);
+ ValueVector subVector = vector.getVectorByType(typeId);
+ validateOrThrow(offset < subVector.getValueCount(),
+ "Dense union vector offset exceeds sub-vector boundary. Vector offset %s, sub vector size %s",
+ offset, subVector.getValueCount());
+ }
+
+ for (ValueVector subVector : vector.getChildrenFromFields()) {
+ subVector.accept(this, null);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(NullVector vector, Void value) {
+ return null;
+ }
+
+ @Override
+ public Void visit(ExtensionTypeVector<?> vector, Void value) {
+ vector.getUnderlyingVector().accept(this, value);
+ return null;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorTypeVisitor.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorTypeVisitor.java
new file mode 100644
index 000000000..65795b468
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorTypeVisitor.java
@@ -0,0 +1,378 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.validate;
+
+import static org.apache.arrow.vector.validate.ValidateUtil.validateOrThrow;
+
+import org.apache.arrow.vector.BaseFixedWidthVector;
+import org.apache.arrow.vector.BaseLargeVariableWidthVector;
+import org.apache.arrow.vector.BaseVariableWidthVector;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.BitVector;
+import org.apache.arrow.vector.DateDayVector;
+import org.apache.arrow.vector.DateMilliVector;
+import org.apache.arrow.vector.Decimal256Vector;
+import org.apache.arrow.vector.DecimalVector;
+import org.apache.arrow.vector.DurationVector;
+import org.apache.arrow.vector.ExtensionTypeVector;
+import org.apache.arrow.vector.FixedSizeBinaryVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.IntervalDayVector;
+import org.apache.arrow.vector.IntervalMonthDayNanoVector;
+import org.apache.arrow.vector.IntervalYearVector;
+import org.apache.arrow.vector.LargeVarBinaryVector;
+import org.apache.arrow.vector.LargeVarCharVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.SmallIntVector;
+import org.apache.arrow.vector.TimeMicroVector;
+import org.apache.arrow.vector.TimeMilliVector;
+import org.apache.arrow.vector.TimeNanoVector;
+import org.apache.arrow.vector.TimeSecVector;
+import org.apache.arrow.vector.TimeStampMicroTZVector;
+import org.apache.arrow.vector.TimeStampMicroVector;
+import org.apache.arrow.vector.TimeStampMilliTZVector;
+import org.apache.arrow.vector.TimeStampMilliVector;
+import org.apache.arrow.vector.TimeStampNanoTZVector;
+import org.apache.arrow.vector.TimeStampNanoVector;
+import org.apache.arrow.vector.TimeStampSecTZVector;
+import org.apache.arrow.vector.TimeStampSecVector;
+import org.apache.arrow.vector.TinyIntVector;
+import org.apache.arrow.vector.UInt1Vector;
+import org.apache.arrow.vector.UInt2Vector;
+import org.apache.arrow.vector.UInt4Vector;
+import org.apache.arrow.vector.UInt8Vector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.VarBinaryVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.complex.DenseUnionVector;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.NonNullableStructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.types.DateUnit;
+import org.apache.arrow.vector.types.FloatingPointPrecision;
+import org.apache.arrow.vector.types.IntervalUnit;
+import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.UnionMode;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.FieldType;
+
+/**
+ * Utility to validate vector type information.
+ */
+public class ValidateVectorTypeVisitor implements VectorVisitor<Void, Void> {
+
+ private void validateVectorCommon(ValueVector vector, Class<? extends ArrowType> expectedArrowType) {
+ validateOrThrow(vector.getField() != null, "Vector field is empty.");
+ validateOrThrow(vector.getField().getFieldType() != null, "Vector field type is empty.");
+ ArrowType arrowType = vector.getField().getFieldType().getType();
+ validateOrThrow(arrowType != null, "Vector arrow type is empty.");
+ validateOrThrow(expectedArrowType == arrowType.getClass(),
+ "Incorrect arrow type for " + vector.getClass() + " : " + arrowType.toString());
+ }
+
+ private void validateIntVector(ValueVector vector, int expectedWidth, boolean expectedSigned) {
+ validateOrThrow(vector.getField().getFieldType().getType() instanceof ArrowType.Int,
+ "Vector %s is not an integer vector.", vector.getClass());
+ ArrowType.Int intType = (ArrowType.Int) vector.getField().getFieldType().getType();
+ validateOrThrow(intType.getIsSigned() == expectedSigned,
+ "Expecting bit width %s, actual width %s.", expectedWidth, intType.getBitWidth());
+ validateOrThrow(intType.getBitWidth() == expectedWidth, "Expecting bit width %s, actual bit width %s.",
+ expectedWidth, intType.getBitWidth());
+ }
+
+ private void validateFloatingPointVector(ValueVector vector, FloatingPointPrecision expectedPrecision) {
+ validateOrThrow(vector.getField().getFieldType().getType() instanceof ArrowType.FloatingPoint,
+ "Vector %s is not a floating point vector.", vector.getClass());
+ ArrowType.FloatingPoint floatType = (ArrowType.FloatingPoint) vector.getField().getFieldType().getType();
+ validateOrThrow(floatType.getPrecision() == expectedPrecision, "Expecting precision %s, actual precision %s.",
+ expectedPrecision, floatType.getPrecision());
+ }
+
+ private void validateDateVector(ValueVector vector, DateUnit expectedDateUnit) {
+ validateOrThrow(vector.getField().getFieldType().getType() instanceof ArrowType.Date,
+ "Vector %s is not a date vector", vector.getClass());
+ ArrowType.Date dateType = (ArrowType.Date) vector.getField().getFieldType().getType();
+ validateOrThrow(dateType.getUnit() == expectedDateUnit,
+ "Expecting date unit %s, actual date unit %s.", expectedDateUnit, dateType.getUnit());
+ }
+
+ private void validateTimeVector(ValueVector vector, TimeUnit expectedTimeUnit, int expectedBitWidth) {
+ validateOrThrow(vector.getField().getFieldType().getType() instanceof ArrowType.Time,
+ "Vector %s is not a time vector.", vector.getClass());
+ ArrowType.Time timeType = (ArrowType.Time) vector.getField().getFieldType().getType();
+ validateOrThrow(timeType.getUnit() == expectedTimeUnit,
+ "Expecting time unit %s, actual time unit %s.", expectedTimeUnit, timeType.getUnit());
+ validateOrThrow(timeType.getBitWidth() == expectedBitWidth,
+ "Expecting bit width %s, actual bit width %s.", expectedBitWidth, timeType.getBitWidth());
+ }
+
+ private void validateIntervalVector(ValueVector vector, IntervalUnit expectedIntervalUnit) {
+ validateOrThrow(vector.getField().getFieldType().getType() instanceof ArrowType.Interval,
+ "Vector %s is not an interval vector.", vector.getClass());
+ ArrowType.Interval intervalType = (ArrowType.Interval) vector.getField().getFieldType().getType();
+ validateOrThrow(intervalType.getUnit() == expectedIntervalUnit,
+ "Expecting interval unit %s, actual date unit %s.", expectedIntervalUnit, intervalType.getUnit());
+ }
+
+ private void validateTimeStampVector(ValueVector vector, TimeUnit expectedTimeUnit, boolean expectTZ) {
+ validateOrThrow(vector.getField().getFieldType().getType() instanceof ArrowType.Timestamp,
+ "Vector %s is not a time stamp vector.", vector.getClass());
+ ArrowType.Timestamp timestampType = (ArrowType.Timestamp) vector.getField().getFieldType().getType();
+ validateOrThrow(timestampType.getUnit() == expectedTimeUnit,
+ "Expecting time stamp unit %s, actual time stamp unit %s.", expectedTimeUnit, timestampType.getUnit());
+ if (expectTZ) {
+ validateOrThrow(timestampType.getTimezone() != null, "The time zone should not be null");
+ } else {
+ validateOrThrow(timestampType.getTimezone() == null, "The time zone should be null");
+ }
+ }
+
+ private void validateExtensionTypeVector(ExtensionTypeVector<?> vector) {
+ validateOrThrow(vector.getField().getFieldType().getType() instanceof ArrowType.ExtensionType,
+ "Vector %s is not an extension type vector.", vector.getClass());
+ validateOrThrow(vector.getField().getMetadata().containsKey(ArrowType.ExtensionType.EXTENSION_METADATA_KEY_NAME),
+ "Field %s does not have proper extension type metadata: %s",
+ vector.getField().getName(),
+ vector.getField().getMetadata());
+ // Validate the storage vector type
+ vector.getUnderlyingVector().accept(this, null);
+ }
+
+ @Override
+ public Void visit(BaseFixedWidthVector vector, Void value) {
+ if (vector instanceof TinyIntVector) {
+ validateVectorCommon(vector, ArrowType.Int.class);
+ validateIntVector(vector, 8, true);
+ } else if (vector instanceof SmallIntVector) {
+ validateVectorCommon(vector, ArrowType.Int.class);
+ validateIntVector(vector, 16, true);
+ } else if (vector instanceof IntVector) {
+ validateVectorCommon(vector, ArrowType.Int.class);
+ validateIntVector(vector, 32, true);
+ } else if (vector instanceof BigIntVector) {
+ validateVectorCommon(vector, ArrowType.Int.class);
+ validateIntVector(vector, 64, true);
+ } else if (vector instanceof UInt1Vector) {
+ validateVectorCommon(vector, ArrowType.Int.class);
+ validateIntVector(vector, 8, false);
+ } else if (vector instanceof UInt2Vector) {
+ validateVectorCommon(vector, ArrowType.Int.class);
+ validateIntVector(vector, 16, false);
+ } else if (vector instanceof UInt4Vector) {
+ validateVectorCommon(vector, ArrowType.Int.class);
+ validateIntVector(vector, 32, false);
+ } else if (vector instanceof UInt8Vector) {
+ validateVectorCommon(vector, ArrowType.Int.class);
+ validateIntVector(vector, 64, false);
+ } else if (vector instanceof BitVector) {
+ validateVectorCommon(vector, ArrowType.Bool.class);
+ } else if (vector instanceof DecimalVector || vector instanceof Decimal256Vector) {
+ validateVectorCommon(vector, ArrowType.Decimal.class);
+ ArrowType.Decimal arrowType = (ArrowType.Decimal) vector.getField().getType();
+ validateOrThrow(arrowType.getScale() > 0, "The scale of decimal %s is not positive.", arrowType.getScale());
+ validateOrThrow(arrowType.getPrecision() > 0, "The precision of decimal %S is not positive.",
+ arrowType.getPrecision());
+ } else if (vector instanceof DateDayVector) {
+ validateVectorCommon(vector, ArrowType.Date.class);
+ validateDateVector(vector, DateUnit.DAY);
+ } else if (vector instanceof DateMilliVector) {
+ validateVectorCommon(vector, ArrowType.Date.class);
+ validateDateVector(vector, DateUnit.MILLISECOND);
+ } else if (vector instanceof DurationVector) {
+ validateVectorCommon(vector, ArrowType.Duration.class);
+ ArrowType.Duration arrowType = (ArrowType.Duration) vector.getField().getType();
+ validateOrThrow(((DurationVector) vector).getUnit() == arrowType.getUnit(),
+ "Different duration time unit for vector and arrow type. Vector time unit %s, type time unit %s.",
+ ((DurationVector) vector).getUnit(), arrowType.getUnit());
+ } else if (vector instanceof Float4Vector) {
+ validateVectorCommon(vector, ArrowType.FloatingPoint.class);
+ validateFloatingPointVector(vector, FloatingPointPrecision.SINGLE);
+ } else if (vector instanceof Float8Vector) {
+ validateVectorCommon(vector, ArrowType.FloatingPoint.class);
+ validateFloatingPointVector(vector, FloatingPointPrecision.DOUBLE);
+ } else if (vector instanceof IntervalDayVector) {
+ validateVectorCommon(vector, ArrowType.Interval.class);
+ validateIntervalVector(vector, IntervalUnit.DAY_TIME);
+ } else if (vector instanceof IntervalMonthDayNanoVector) {
+ validateVectorCommon(vector, ArrowType.Interval.class);
+ validateIntervalVector(vector, IntervalUnit.MONTH_DAY_NANO);
+ } else if (vector instanceof IntervalYearVector) {
+ validateVectorCommon(vector, ArrowType.Interval.class);
+ validateIntervalVector(vector, IntervalUnit.YEAR_MONTH);
+ } else if (vector instanceof TimeMicroVector) {
+ validateVectorCommon(vector, ArrowType.Time.class);
+ validateTimeVector(vector, TimeUnit.MICROSECOND, 64);
+ } else if (vector instanceof TimeMilliVector) {
+ validateVectorCommon(vector, ArrowType.Time.class);
+ validateTimeVector(vector, TimeUnit.MILLISECOND, 32);
+ } else if (vector instanceof TimeNanoVector) {
+ validateVectorCommon(vector, ArrowType.Time.class);
+ validateTimeVector(vector, TimeUnit.NANOSECOND, 64);
+ } else if (vector instanceof TimeSecVector) {
+ validateVectorCommon(vector, ArrowType.Time.class);
+ validateTimeVector(vector, TimeUnit.SECOND, 32);
+ } else if (vector instanceof TimeStampMicroTZVector) {
+ validateVectorCommon(vector, ArrowType.Timestamp.class);
+ validateTimeStampVector(vector, TimeUnit.MICROSECOND, true);
+ } else if (vector instanceof TimeStampMicroVector) {
+ validateVectorCommon(vector, ArrowType.Timestamp.class);
+ validateTimeStampVector(vector, TimeUnit.MICROSECOND, false);
+ } else if (vector instanceof TimeStampMilliTZVector) {
+ validateVectorCommon(vector, ArrowType.Timestamp.class);
+ validateTimeStampVector(vector, TimeUnit.MILLISECOND, true);
+ } else if (vector instanceof TimeStampMilliVector) {
+ validateVectorCommon(vector, ArrowType.Timestamp.class);
+ validateTimeStampVector(vector, TimeUnit.MILLISECOND, false);
+ } else if (vector instanceof TimeStampNanoTZVector) {
+ validateVectorCommon(vector, ArrowType.Timestamp.class);
+ validateTimeStampVector(vector, TimeUnit.NANOSECOND, true);
+ } else if (vector instanceof TimeStampNanoVector) {
+ validateVectorCommon(vector, ArrowType.Timestamp.class);
+ validateTimeStampVector(vector, TimeUnit.NANOSECOND, false);
+ } else if (vector instanceof TimeStampSecTZVector) {
+ validateVectorCommon(vector, ArrowType.Timestamp.class);
+ validateTimeStampVector(vector, TimeUnit.SECOND, true);
+ } else if (vector instanceof TimeStampSecVector) {
+ validateVectorCommon(vector, ArrowType.Timestamp.class);
+ validateTimeStampVector(vector, TimeUnit.SECOND, false);
+ } else if (vector instanceof FixedSizeBinaryVector) {
+ validateVectorCommon(vector, ArrowType.FixedSizeBinary.class);
+ ArrowType.FixedSizeBinary arrowType = (ArrowType.FixedSizeBinary) vector.getField().getType();
+ validateOrThrow(arrowType.getByteWidth() > 0, "The byte width of a FixedSizeBinaryVector %s is not positive.",
+ arrowType.getByteWidth());
+ validateOrThrow(arrowType.getByteWidth() == vector.getTypeWidth(),
+ "Type width mismatch for FixedSizeBinaryVector. Vector type width %s, arrow type type width %s.",
+ vector.getTypeWidth(), arrowType.getByteWidth());
+ } else {
+ throw new IllegalArgumentException("Unknown type for fixed width vector " + vector.getClass());
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(BaseVariableWidthVector vector, Void value) {
+ if (vector instanceof VarCharVector) {
+ validateVectorCommon(vector, ArrowType.Utf8.class);
+ } else if (vector instanceof VarBinaryVector) {
+ validateVectorCommon(vector, ArrowType.Binary.class);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(BaseLargeVariableWidthVector vector, Void value) {
+ if (vector instanceof LargeVarCharVector) {
+ validateVectorCommon(vector, ArrowType.LargeUtf8.class);
+ } else if (vector instanceof LargeVarBinaryVector) {
+ validateVectorCommon(vector, ArrowType.LargeBinary.class);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(ListVector vector, Void value) {
+ validateVectorCommon(vector, ArrowType.List.class);
+ ValueVector innerVector = vector.getDataVector();
+ if (innerVector != null) {
+ innerVector.accept(this, null);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(FixedSizeListVector vector, Void value) {
+ validateVectorCommon(vector, ArrowType.FixedSizeList.class);
+ ArrowType.FixedSizeList arrowType = (ArrowType.FixedSizeList) vector.getField().getType();
+ validateOrThrow(arrowType.getListSize() == vector.getListSize(),
+ "Inconsistent list size for FixedSizeListVector. Vector list size %s, arrow type list size %s.",
+ vector.getListSize(), arrowType.getListSize());
+ validateOrThrow(arrowType.getListSize() > 0, "The list size %s is not positive.", arrowType.getListSize());
+ ValueVector innerVector = vector.getDataVector();
+ if (innerVector != null) {
+ innerVector.accept(this, null);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(LargeListVector vector, Void value) {
+ validateVectorCommon(vector, ArrowType.LargeList.class);
+ ValueVector innerVector = vector.getDataVector();
+ if (innerVector != null) {
+ innerVector.accept(this, null);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(NonNullableStructVector vector, Void value) {
+ validateVectorCommon(vector, ArrowType.Struct.class);
+ validateOrThrow(vector.getField().getChildren().size() == vector.getChildrenFromFields().size(),
+ "Child field count and child vector count mismatch. Vector child count %s, field child count %s",
+ vector.getChildrenFromFields().size(), vector.getField().getChildren().size());
+ for (int i = 0; i < vector.getChildrenFromFields().size(); i++) {
+ ValueVector subVector = vector.getChildByOrdinal(i);
+ FieldType subType = vector.getField().getChildren().get(i).getFieldType();
+
+ validateOrThrow(subType.equals(subVector.getField().getFieldType()),
+ "Struct vector's field type not equal to the child vector's field type. " +
+ "Struct field type %s, sub-vector field type %s", subType, subVector.getField().getFieldType());
+ subVector.accept(this, null);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(UnionVector vector, Void value) {
+ validateVectorCommon(vector, ArrowType.Union.class);
+ ArrowType.Union arrowType = (ArrowType.Union) vector.getField().getType();
+ validateOrThrow(arrowType.getMode() == UnionMode.Sparse, "The union mode of UnionVector must be sparse");
+ for (ValueVector subVector : vector.getChildrenFromFields()) {
+ subVector.accept(this, null);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(DenseUnionVector vector, Void value) {
+ validateVectorCommon(vector, ArrowType.Union.class);
+ ArrowType.Union arrowType = (ArrowType.Union) vector.getField().getType();
+ validateOrThrow(arrowType.getMode() == UnionMode.Dense, "The union mode of DenseUnionVector must be dense");
+ for (ValueVector subVector : vector.getChildrenFromFields()) {
+ subVector.accept(this, null);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(NullVector vector, Void value) {
+ validateVectorCommon(vector, ArrowType.Null.class);
+ return null;
+ }
+
+ @Override
+ public Void visit(ExtensionTypeVector<?> vector, Void value) {
+ validateExtensionTypeVector(vector);
+ return null;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorVisitor.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorVisitor.java
new file mode 100644
index 000000000..7e99b1f90
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorVisitor.java
@@ -0,0 +1,273 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.validate;
+
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.vector.BaseFixedWidthVector;
+import org.apache.arrow.vector.BaseLargeVariableWidthVector;
+import org.apache.arrow.vector.BaseVariableWidthVector;
+import org.apache.arrow.vector.ExtensionTypeVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.complex.DenseUnionVector;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.NonNullableStructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.util.ValueVectorUtility;
+
+/**
+ * Visitor to validate vector (without validating data).
+ * This visitor could be used for {@link ValueVector#accept(VectorVisitor, Object)} API,
+ * and also users could simply use {@link ValueVectorUtility#validate(ValueVector)}.
+ */
+public class ValidateVectorVisitor implements VectorVisitor<Void, Void> {
+
+ @Override
+ public Void visit(BaseFixedWidthVector vector, Void value) {
+ if (vector.getValueCount() > 0) {
+ if (vector.getDataBuffer() == null || vector.getDataBuffer().capacity() == 0) {
+ throw new IllegalArgumentException("valueBuffer is null or capacity is 0");
+ }
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(BaseVariableWidthVector vector, Void value) {
+
+ if (vector.getValueCount() > 0) {
+ if (vector.getDataBuffer() == null || vector.getDataBuffer().capacity() == 0) {
+ throw new IllegalArgumentException("valueBuffer is null or capacity is 0");
+ }
+
+ ArrowBuf offsetBuf = vector.getOffsetBuffer();
+ int minBufferSize = (vector.getValueCount() + 1) * BaseVariableWidthVector.OFFSET_WIDTH;
+
+ if (offsetBuf.capacity() < minBufferSize) {
+ throw new IllegalArgumentException(String.format("offsetBuffer too small in vector of type %s" +
+ " and valueCount %s : expected at least %s byte(s), got %s",
+ vector.getField().getType().toString(),
+ vector.getValueCount(), minBufferSize, offsetBuf.capacity()));
+ }
+
+ int firstOffset = vector.getOffsetBuffer().getInt(0);
+ int lastOffset = vector.getOffsetBuffer().getInt(vector.getValueCount() * BaseVariableWidthVector.OFFSET_WIDTH);
+
+ if (firstOffset < 0 || lastOffset < 0) {
+ throw new IllegalArgumentException("Negative offsets in vector");
+ }
+
+ int dataExtent = lastOffset - firstOffset;
+
+ if (dataExtent > 0 && (vector.getDataBuffer().capacity() == 0)) {
+ throw new IllegalArgumentException("dataBuffer capacity is 0");
+ }
+
+ if (dataExtent > vector.getDataBuffer().capacity()) {
+ throw new IllegalArgumentException(String.format("Length spanned by offsets %s larger than" +
+ " dataBuffer capacity %s", dataExtent, vector.getValueCount()));
+ }
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(BaseLargeVariableWidthVector left, Void value) {
+ return null;
+ }
+
+ @Override
+ public Void visit(ListVector vector, Void value) {
+
+ FieldVector dataVector = vector.getDataVector();
+
+ if (vector.getValueCount() > 0) {
+
+ ArrowBuf offsetBuf = vector.getOffsetBuffer();
+ int minBufferSize = (vector.getValueCount() + 1) * BaseVariableWidthVector.OFFSET_WIDTH;
+
+ if (offsetBuf.capacity() < minBufferSize) {
+ throw new IllegalArgumentException(String.format("offsetBuffer too small in vector of type %s" +
+ " and valueCount %s : expected at least %s byte(s), got %s",
+ vector.getField().getType().toString(),
+ vector.getValueCount(), minBufferSize, offsetBuf.capacity()));
+ }
+
+ int firstOffset = vector.getOffsetBuffer().getInt(0);
+ int lastOffset = vector.getOffsetBuffer().getInt(vector.getValueCount() * BaseVariableWidthVector.OFFSET_WIDTH);
+
+ if (firstOffset < 0 || lastOffset < 0) {
+ throw new IllegalArgumentException("Negative offsets in list vector");
+ }
+
+ int dataExtent = lastOffset - firstOffset;
+
+ if (dataExtent > 0 && (dataVector.getDataBuffer() == null || dataVector.getDataBuffer().capacity() == 0)) {
+ throw new IllegalArgumentException("valueBuffer is null or capacity is 0");
+ }
+
+ if (dataExtent > dataVector.getValueCount()) {
+ throw new IllegalArgumentException(String.format("Length spanned by list offsets (%s) larger than" +
+ " data vector valueCount (length %s)", dataExtent, dataVector.getValueCount()));
+ }
+ }
+
+ return dataVector.accept(this, null);
+ }
+
+ @Override
+ public Void visit(LargeListVector vector, Void value) {
+
+ FieldVector dataVector = vector.getDataVector();
+
+ if (vector.getValueCount() > 0) {
+
+ ArrowBuf offsetBuf = vector.getOffsetBuffer();
+ long minBufferSize = (vector.getValueCount() + 1) * LargeListVector.OFFSET_WIDTH;
+
+ if (offsetBuf.capacity() < minBufferSize) {
+ throw new IllegalArgumentException(String.format("offsetBuffer too small in vector of type %s" +
+ " and valueCount %s : expected at least %s byte(s), got %s",
+ vector.getField().getType().toString(),
+ vector.getValueCount(), minBufferSize, offsetBuf.capacity()));
+ }
+
+ long firstOffset = vector.getOffsetBuffer().getLong(0);
+ long lastOffset = vector.getOffsetBuffer().getLong(vector.getValueCount() * LargeListVector.OFFSET_WIDTH);
+
+ if (firstOffset < 0 || lastOffset < 0) {
+ throw new IllegalArgumentException("Negative offsets in list vector");
+ }
+
+ long dataExtent = lastOffset - firstOffset;
+
+ if (dataExtent > 0 && (dataVector.getDataBuffer() == null || dataVector.getDataBuffer().capacity() == 0)) {
+ throw new IllegalArgumentException("valueBuffer is null or capacity is 0");
+ }
+
+ if (dataExtent > dataVector.getValueCount()) {
+ throw new IllegalArgumentException(String.format("Length spanned by list offsets (%s) larger than" +
+ " data vector valueCount (length %s)", dataExtent, dataVector.getValueCount()));
+ }
+ }
+
+ return dataVector.accept(this, null);
+ }
+
+ @Override
+ public Void visit(FixedSizeListVector vector, Void value) {
+
+ FieldVector dataVector = vector.getDataVector();
+ int valueCount = vector.getValueCount();
+ int listSize = vector.getListSize();
+
+ if (valueCount > 0 && (dataVector.getDataBuffer() == null || dataVector.getDataBuffer().capacity() == 0)) {
+ throw new IllegalArgumentException("valueBuffer is null or capacity is 0");
+ }
+
+ if (valueCount * listSize != dataVector.getValueCount()) {
+ throw new IllegalArgumentException(String.format("data vector valueCount invalid, expect %s, " +
+ "actual is: %s", valueCount * listSize, dataVector.getValueCount()));
+ }
+
+ return null;
+ }
+
+ @Override
+ public Void visit(NonNullableStructVector vector, Void value) {
+
+ List<Field> childFields = vector.getField().getChildren();
+ final int valueCount = vector.getValueCount();
+
+ for (int i = 0; i < childFields.size(); i++) {
+ FieldVector child = vector.getChildrenFromFields().get(i);
+
+ if (child.getValueCount() != valueCount) {
+ throw new IllegalArgumentException(String.format("struct child vector #%s valueCount is not equals with " +
+ "struct vector, expect %s, actual %s", i, vector.getValueCount(), child.getValueCount()));
+ }
+
+ if (!childFields.get(i).getType().equals(child.getField().getType())) {
+ throw new IllegalArgumentException(String.format("struct child vector #%s does not match type: %s vs %s",
+ i, childFields.get(i).getType().toString(), child.getField().getType().toString()));
+ }
+
+ child.accept(this, null);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(UnionVector vector, Void value) {
+
+ List<Field> childFields = vector.getField().getChildren();
+ final int valueCount = vector.getValueCount();
+
+ for (int i = 0; i < childFields.size(); i++) {
+ FieldVector child = vector.getChildrenFromFields().get(i);
+
+ if (child.getValueCount() != valueCount) {
+ throw new IllegalArgumentException(String.format("union child vector #%s valueCount is not equals with union" +
+ " vector, expect %s, actual %s", i, vector.getValueCount(), child.getValueCount()));
+ }
+
+ if (!childFields.get(i).getType().equals(child.getField().getType())) {
+ throw new IllegalArgumentException(String.format("union child vector #%s does not match type: %s vs %s",
+ i, childFields.get(i).getType().toString(), child.getField().getType().toString()));
+ }
+
+ child.accept(this, null);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(DenseUnionVector vector, Void value) {
+
+ List<Field> childFields = vector.getField().getChildren();
+ for (int i = 0; i < childFields.size(); i++) {
+ FieldVector child = vector.getChildrenFromFields().get(i);
+
+ if (!childFields.get(i).getType().equals(child.getField().getType())) {
+ throw new IllegalArgumentException(String.format("union child vector #%s does not match type: %s vs %s",
+ i, childFields.get(i).getType().toString(), child.getField().getType().toString()));
+ }
+
+ child.accept(this, null);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(NullVector vector, Void value) {
+ return null;
+ }
+
+ @Override
+ public Void visit(ExtensionTypeVector<?> vector, Void value) {
+ vector.getUnderlyingVector().accept(this, value);
+ return null;
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/util/TestSchemaUtil.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/util/TestSchemaUtil.java
new file mode 100644
index 000000000..cefff8382
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/util/TestSchemaUtil.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.util;
+
+import static java.util.Arrays.asList;
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.util.SchemaUtility;
+import org.junit.Test;
+
+public class TestSchemaUtil {
+
+ private static Field field(String name, boolean nullable, ArrowType type, Field... children) {
+ return new Field(name, new FieldType(nullable, type, null, null), asList(children));
+ }
+
+ @Test
+ public void testSerializationAndDeserialization() throws IOException {
+ Schema schema = new Schema(asList(
+ field("a", false, new ArrowType.Null()),
+ field("b", true, new ArrowType.Utf8()),
+ field("c", true, new ArrowType.Binary()))
+ );
+
+ byte[] serialized = SchemaUtility.serialize(schema);
+ Schema deserialized = SchemaUtility.deserialize(serialized, new RootAllocator(Long.MAX_VALUE));
+ assertEquals(schema, deserialized);
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/DirtyRootAllocator.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/DirtyRootAllocator.java
new file mode 100644
index 000000000..27b8ac752
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/DirtyRootAllocator.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferManager;
+import org.apache.arrow.memory.RootAllocator;
+
+/**
+ * Root allocator that returns buffers pre-filled with a given value.<br>
+ * Useful for testing if value vectors are properly zeroing their buffers.
+ */
+public class DirtyRootAllocator extends RootAllocator {
+
+ private final byte fillValue;
+
+ public DirtyRootAllocator(final long limit, final byte fillValue) {
+ super(limit);
+ this.fillValue = fillValue;
+ }
+
+ @Override
+ public ArrowBuf buffer(long size) {
+ return buffer(size, null);
+ }
+
+ @Override
+ public ArrowBuf buffer(long size, BufferManager manager) {
+ ArrowBuf buffer = super.buffer(size, manager);
+ // contaminate the buffer
+ for (int i = 0; i < buffer.capacity(); i++) {
+ buffer.setByte(i, fillValue);
+ }
+
+ return buffer;
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ITTestLargeVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ITTestLargeVector.java
new file mode 100644
index 000000000..19648dc9e
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ITTestLargeVector.java
@@ -0,0 +1,280 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.math.BigDecimal;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.holders.NullableDecimalHolder;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Integration test for a vector with a large (more than 2GB) {@link org.apache.arrow.memory.ArrowBuf} as
+ * the data buffer.
+ * To run this test, please make sure there is at least 4GB free memory in the system.
+ */
+public class ITTestLargeVector {
+ private static final Logger logger = LoggerFactory.getLogger(ITTestLargeVector.class);
+
+ @Test
+ public void testLargeLongVector() {
+ logger.trace("Testing large big int vector.");
+
+ final long bufSize = 4 * 1024 * 1024 * 1024L;
+ final int vecLength = (int) (bufSize / BigIntVector.TYPE_WIDTH);
+
+ try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+ BigIntVector largeVec = new BigIntVector("vec", allocator)) {
+ largeVec.allocateNew(vecLength);
+
+ logger.trace("Successfully allocated a vector with capacity {}", vecLength);
+
+ for (int i = 0; i < vecLength; i++) {
+ largeVec.set(i, i * 10L);
+
+ if ((i + 1) % 10000 == 0) {
+ logger.trace("Successfully written {} values", i + 1);
+ }
+ }
+ logger.trace("Successfully written {} values", vecLength);
+
+ for (int i = 0; i < vecLength; i++) {
+ long val = largeVec.get(i);
+ assertEquals(i * 10L, val);
+
+ if ((i + 1) % 10000 == 0) {
+ logger.trace("Successfully read {} values", i + 1);
+ }
+ }
+ logger.trace("Successfully read {} values", vecLength);
+ }
+ logger.trace("Successfully released the large vector.");
+ }
+
+ @Test
+ public void testLargeIntVector() {
+ logger.trace("Testing large int vector.");
+
+ final long bufSize = 4 * 1024 * 1024 * 1024L;
+ final int vecLength = (int) (bufSize / IntVector.TYPE_WIDTH);
+
+ try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+ IntVector largeVec = new IntVector("vec", allocator)) {
+ largeVec.allocateNew(vecLength);
+
+ logger.trace("Successfully allocated a vector with capacity {}", vecLength);
+
+ for (int i = 0; i < vecLength; i++) {
+ largeVec.set(i, i);
+
+ if ((i + 1) % 10000 == 0) {
+ logger.trace("Successfully written {} values", i + 1);
+ }
+ }
+ logger.trace("Successfully written {} values", vecLength);
+
+ for (int i = 0; i < vecLength; i++) {
+ long val = largeVec.get(i);
+ assertEquals(i, val);
+
+ if ((i + 1) % 10000 == 0) {
+ logger.trace("Successfully read {} values", i + 1);
+ }
+ }
+ logger.trace("Successfully read {} values", vecLength);
+ }
+ logger.trace("Successfully released the large vector.");
+ }
+
+ @Test
+ public void testLargeDecimalVector() {
+ logger.trace("Testing large decimal vector.");
+
+ final long bufSize = 4 * 1024 * 1024 * 1024L;
+ final int vecLength = (int) (bufSize / DecimalVector.TYPE_WIDTH);
+
+ try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+ DecimalVector largeVec = new DecimalVector("vec", allocator, 38, 0)) {
+ largeVec.allocateNew(vecLength);
+
+ logger.trace("Successfully allocated a vector with capacity {}", vecLength);
+
+ for (int i = 0; i < vecLength; i++) {
+ largeVec.set(i, 0);
+
+ if ((i + 1) % 10000 == 0) {
+ logger.trace("Successfully written {} values", i + 1);
+ }
+ }
+ logger.trace("Successfully written {} values", vecLength);
+
+ for (int i = 0; i < vecLength; i++) {
+ ArrowBuf buf = largeVec.get(i);
+ assertEquals(buf.capacity(), DecimalVector.TYPE_WIDTH);
+ assertEquals(0, buf.getLong(0));
+ assertEquals(0, buf.getLong(8));
+
+ if ((i + 1) % 10000 == 0) {
+ logger.trace("Successfully read {} values", i + 1);
+ }
+ }
+ logger.trace("Successfully read {} values", vecLength);
+
+ // try setting values with a large offset in the buffer
+ largeVec.set(vecLength - 1, 12345L);
+ assertEquals(12345L, largeVec.getObject(vecLength - 1).longValue());
+
+ NullableDecimalHolder holder = new NullableDecimalHolder();
+ holder.buffer = largeVec.valueBuffer;
+ holder.isSet = 1;
+ holder.start = (long) (vecLength - 1) * largeVec.getTypeWidth();
+ assertTrue(holder.start > Integer.MAX_VALUE);
+ largeVec.set(0, holder);
+
+ BigDecimal decimal = largeVec.getObject(0);
+ assertEquals(12345L, decimal.longValue());
+
+ logger.trace("Successfully setting values from large offsets");
+ }
+ logger.trace("Successfully released the large vector.");
+ }
+
+ @Test
+ public void testLargeFixedSizeBinaryVector() {
+ logger.trace("Testing large fixed size binary vector.");
+
+ final long bufSize = 4 * 1024 * 1024 * 1024L;
+ final int typeWidth = 8;
+ final int vecLength = (int) (bufSize / typeWidth);
+
+ try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+ FixedSizeBinaryVector largeVec = new FixedSizeBinaryVector("vec", allocator, typeWidth)) {
+ largeVec.allocateNew(vecLength);
+
+ logger.trace("Successfully allocated a vector with capacity {}", vecLength);
+
+ byte[] value = new byte[] {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'};
+ for (int i = 0; i < vecLength; i++) {
+ largeVec.set(i, value);
+
+ if ((i + 1) % 10000 == 0) {
+ logger.trace("Successfully written {} values", i + 1);
+ }
+ }
+ logger.trace("Successfully written {} values", vecLength);
+
+ for (int i = 0; i < vecLength; i++) {
+ byte[] buf = largeVec.get(i);
+ assertEquals(typeWidth, buf.length);
+ assertArrayEquals(buf, value);
+
+ if ((i + 1) % 10000 == 0) {
+ logger.trace("Successfully read {} values", i + 1);
+ }
+ }
+ logger.trace("Successfully read {} values", vecLength);
+ }
+ logger.trace("Successfully released the large vector.");
+ }
+
+ @Test
+ public void testLargeVarCharVector() {
+ logger.trace("Testing large var char vector.");
+
+ final long bufSize = 4 * 1024 * 1024 * 1024L;
+ final int vecLength = (int) (bufSize / BaseVariableWidthVector.OFFSET_WIDTH);
+ final String strElement = "a";
+
+ try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+ VarCharVector largeVec = new VarCharVector("vec", allocator)) {
+ largeVec.allocateNew(vecLength);
+
+ logger.trace("Successfully allocated a vector with capacity " + vecLength);
+
+ for (int i = 0; i < vecLength; i++) {
+ largeVec.setSafe(i, strElement.getBytes());
+
+ if ((i + 1) % 10000 == 0) {
+ logger.trace("Successfully written " + (i + 1) + " values");
+ }
+ }
+ largeVec.setValueCount(vecLength);
+ assertTrue(largeVec.getOffsetBuffer().readableBytes() > Integer.MAX_VALUE);
+ assertTrue(largeVec.getDataBuffer().readableBytes() < Integer.MAX_VALUE);
+ logger.trace("Successfully written " + vecLength + " values");
+
+ for (int i = 0; i < vecLength; i++) {
+ byte[] val = largeVec.get(i);
+ assertEquals(strElement, new String(val));
+
+ if ((i + 1) % 10000 == 0) {
+ logger.trace("Successfully read " + (i + 1) + " values");
+ }
+ }
+ logger.trace("Successfully read " + vecLength + " values");
+ }
+ logger.trace("Successfully released the large vector.");
+ }
+
+ @Test
+ public void testLargeLargeVarCharVector() {
+ logger.trace("Testing large large var char vector.");
+
+ final long bufSize = 4 * 1024 * 1024 * 1024L;
+ final int vecLength = (int) (bufSize / BaseLargeVariableWidthVector.OFFSET_WIDTH);
+ final String strElement = "9876543210";
+
+ try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+ LargeVarCharVector largeVec = new LargeVarCharVector("vec", allocator)) {
+ largeVec.allocateNew(vecLength);
+
+ logger.trace("Successfully allocated a vector with capacity " + vecLength);
+
+ for (int i = 0; i < vecLength; i++) {
+ largeVec.setSafe(i, strElement.getBytes());
+
+ if ((i + 1) % 10000 == 0) {
+ logger.trace("Successfully written " + (i + 1) + " values");
+ }
+ }
+ largeVec.setValueCount(vecLength);
+ assertTrue(largeVec.getOffsetBuffer().readableBytes() > Integer.MAX_VALUE);
+ assertTrue(largeVec.getDataBuffer().readableBytes() > Integer.MAX_VALUE);
+ logger.trace("Successfully written " + vecLength + " values");
+
+ for (int i = 0; i < vecLength; i++) {
+ byte[] val = largeVec.get(i);
+ assertEquals(strElement, new String(val));
+
+ if ((i + 1) % 10000 == 0) {
+ logger.trace("Successfully read " + (i + 1) + " values");
+ }
+ }
+ logger.trace("Successfully read " + vecLength + " values");
+ }
+ logger.trace("Successfully released the large vector.");
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestBitVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestBitVector.java
new file mode 100644
index 000000000..28d56e342
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestBitVector.java
@@ -0,0 +1,543 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.util.stream.IntStream;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.memory.util.hash.MurmurHasher;
+import org.apache.arrow.vector.testing.ValueVectorDataPopulator;
+import org.apache.arrow.vector.util.TransferPair;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestBitVector {
+ private static final String EMPTY_SCHEMA_PATH = "";
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testBitVectorCopyFromSafe() {
+ final int size = 20;
+ try (final BitVector src = new BitVector(EMPTY_SCHEMA_PATH, allocator);
+ final BitVector dst = new BitVector(EMPTY_SCHEMA_PATH, allocator)) {
+ src.allocateNew(size);
+ dst.allocateNew(10);
+
+ for (int i = 0; i < size; i++) {
+ src.set(i, i % 2);
+ }
+ src.setValueCount(size);
+
+ for (int i = 0; i < size; i++) {
+ dst.copyFromSafe(i, i, src);
+ }
+ dst.setValueCount(size);
+
+ for (int i = 0; i < size; i++) {
+ assertEquals(src.getObject(i), dst.getObject(i));
+ }
+ }
+ }
+
+ @Test
+ public void testSplitAndTransfer() throws Exception {
+
+ try (final BitVector sourceVector = new BitVector("bitvector", allocator)) {
+
+ sourceVector.allocateNew(40);
+
+ /* populate the bitvector -- 010101010101010101010101..... */
+ for (int i = 0; i < 40; i++) {
+ if ((i & 1) == 1) {
+ sourceVector.set(i, 1);
+ } else {
+ sourceVector.set(i, 0);
+ }
+ }
+
+ sourceVector.setValueCount(40);
+
+ /* check the vector output */
+ for (int i = 0; i < 40; i++) {
+ int result = sourceVector.get(i);
+ if ((i & 1) == 1) {
+ assertEquals(Integer.toString(1), Integer.toString(result));
+ } else {
+ assertEquals(Integer.toString(0), Integer.toString(result));
+ }
+ }
+
+ try (final BitVector toVector = new BitVector("toVector", allocator)) {
+ final TransferPair transferPair = sourceVector.makeTransferPair(toVector);
+
+ /*
+ * form test cases such that we cover:
+ *
+ * (1) the start index is exactly where a particular byte starts in the source bit vector
+ * (2) the start index is randomly positioned within a byte in the source bit vector
+ * (2.1) the length is a multiple of 8
+ * (2.2) the length is not a multiple of 8
+ */
+ final int[][] transferLengths = {{0, 8}, {8, 10}, {18, 0}, {18, 8}, {26, 0}, {26, 14}};
+
+ for (final int[] transferLength : transferLengths) {
+ final int start = transferLength[0];
+ final int length = transferLength[1];
+
+ transferPair.splitAndTransfer(start, length);
+
+ /* check the toVector output after doing splitAndTransfer */
+ for (int i = 0; i < length; i++) {
+ int actual = toVector.get(i);
+ int expected = sourceVector.get(start + i);
+ assertEquals("different data values not expected --> sourceVector index: " + (start + i) +
+ " toVector index: " + i, expected, actual);
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testSplitAndTransfer1() throws Exception {
+
+ try (final BitVector sourceVector = new BitVector("bitvector", allocator)) {
+
+ sourceVector.allocateNew(8190);
+
+ /* populate the bitvector */
+ for (int i = 0; i < 8190; i++) {
+ sourceVector.set(i, 1);
+ }
+
+ sourceVector.setValueCount(8190);
+
+ /* check the vector output */
+ for (int i = 0; i < 8190; i++) {
+ int result = sourceVector.get(i);
+ assertEquals(Integer.toString(1), Integer.toString(result));
+ }
+
+ try (final BitVector toVector = new BitVector("toVector", allocator)) {
+ final TransferPair transferPair = sourceVector.makeTransferPair(toVector);
+
+ final int[][] transferLengths = {{0, 4095}, {4095, 4095}};
+
+ for (final int[] transferLength : transferLengths) {
+ final int start = transferLength[0];
+ final int length = transferLength[1];
+
+ transferPair.splitAndTransfer(start, length);
+
+ /* check the toVector output after doing splitAndTransfer */
+ for (int i = 0; i < length; i++) {
+ int actual = toVector.get(i);
+ int expected = sourceVector.get(start + i);
+ assertEquals("different data values not expected --> sourceVector index: " + (start + i) +
+ " toVector index: " + i, expected, actual);
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testSplitAndTransfer2() throws Exception {
+
+ try (final BitVector sourceVector = new BitVector("bitvector", allocator)) {
+
+ sourceVector.allocateNew(32);
+
+ /* populate the bitvector */
+ for (int i = 0; i < 32; i++) {
+ if ((i & 1) == 1) {
+ sourceVector.set(i, 1);
+ } else {
+ sourceVector.set(i, 0);
+ }
+ }
+
+ sourceVector.setValueCount(32);
+
+ /* check the vector output */
+ for (int i = 0; i < 32; i++) {
+ int result = sourceVector.get(i);
+ if ((i & 1) == 1) {
+ assertEquals(Integer.toString(1), Integer.toString(result));
+ } else {
+ assertEquals(Integer.toString(0), Integer.toString(result));
+ }
+ }
+
+ try (final BitVector toVector = new BitVector("toVector", allocator)) {
+ final TransferPair transferPair = sourceVector.makeTransferPair(toVector);
+
+ final int[][] transferLengths = {{5, 22}, {5, 24}, {5, 25}, {5, 27}, {0, 31}, {5, 7}, {2, 3}};
+
+ for (final int[] transferLength : transferLengths) {
+ final int start = transferLength[0];
+ final int length = transferLength[1];
+
+ transferPair.splitAndTransfer(start, length);
+
+ /* check the toVector output after doing splitAndTransfer */
+ for (int i = 0; i < length; i++) {
+ int actual = toVector.get(i);
+ int expected = sourceVector.get(start + i);
+ assertEquals("different data values not expected --> sourceVector index: " + (start + i) +
+ " toVector index: " + i, expected, actual);
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testReallocAfterVectorTransfer1() {
+ try (final BitVector vector = new BitVector(EMPTY_SCHEMA_PATH, allocator)) {
+ vector.allocateNew(4096);
+ int valueCapacity = vector.getValueCapacity();
+ assertEquals(4096, valueCapacity);
+
+ for (int i = 0; i < valueCapacity; i++) {
+ if ((i & 1) == 1) {
+ vector.setToOne(i);
+ }
+ }
+
+ for (int i = 0; i < valueCapacity; i++) {
+ if ((i & 1) == 1) {
+ assertEquals("unexpected cleared bit at index: " + i, 1, vector.get(i));
+ } else {
+ assertTrue("unexpected set bit at index: " + i, vector.isNull(i));
+ }
+ }
+
+ /* trigger first realloc */
+ vector.setSafeToOne(valueCapacity);
+ assertEquals(valueCapacity * 2, vector.getValueCapacity());
+
+ for (int i = valueCapacity; i < valueCapacity * 2; i++) {
+ if ((i & 1) == 1) {
+ vector.setToOne(i);
+ }
+ }
+
+ for (int i = 0; i < valueCapacity * 2; i++) {
+ if (((i & 1) == 1) || (i == valueCapacity)) {
+ assertEquals("unexpected cleared bit at index: " + i, 1, vector.get(i));
+ } else {
+ assertTrue("unexpected set bit at index: " + i, vector.isNull(i));
+ }
+ }
+
+ /* trigger second realloc */
+ vector.setSafeToOne(valueCapacity * 2);
+ assertEquals(valueCapacity * 4, vector.getValueCapacity());
+
+ for (int i = valueCapacity * 2; i < valueCapacity * 4; i++) {
+ if ((i & 1) == 1) {
+ vector.setToOne(i);
+ }
+ }
+
+ for (int i = 0; i < valueCapacity * 4; i++) {
+ if (((i & 1) == 1) || (i == valueCapacity) || (i == valueCapacity * 2)) {
+ assertEquals("unexpected cleared bit at index: " + i, 1, vector.get(i));
+ } else {
+ assertTrue("unexpected set bit at index: " + i, vector.isNull(i));
+ }
+ }
+
+ /* now transfer the vector */
+ TransferPair transferPair = vector.getTransferPair(allocator);
+ transferPair.transfer();
+ final BitVector toVector = (BitVector) transferPair.getTo();
+
+ assertEquals(valueCapacity * 4, toVector.getValueCapacity());
+
+ /* realloc the toVector */
+ toVector.setSafeToOne(valueCapacity * 4);
+
+ for (int i = 0; i < toVector.getValueCapacity(); i++) {
+ if (i <= valueCapacity * 4) {
+ if (((i & 1) == 1) || (i == valueCapacity) ||
+ (i == valueCapacity * 2) || (i == valueCapacity * 4)) {
+ assertEquals("unexpected cleared bit at index: " + i, 1, toVector.get(i));
+ } else {
+ assertTrue("unexpected set bit at index: " + i, toVector.isNull(i));
+ }
+ } else {
+ assertTrue("unexpected set bit at index: " + i, toVector.isNull(i));
+ }
+ }
+
+ toVector.close();
+ }
+ }
+
+ @Test
+ public void testReallocAfterVectorTransfer2() {
+ try (final BitVector vector = new BitVector(EMPTY_SCHEMA_PATH, allocator)) {
+ vector.allocateNew(4096);
+ int valueCapacity = vector.getValueCapacity();
+ assertEquals(4096, valueCapacity);
+
+ for (int i = 0; i < valueCapacity; i++) {
+ if ((i & 1) == 1) {
+ vector.set(i, 1);
+ }
+ }
+
+ for (int i = 0; i < valueCapacity; i++) {
+ if ((i & 1) == 1) {
+ assertFalse("unexpected cleared bit at index: " + i, vector.isNull(i));
+ } else {
+ assertTrue("unexpected set bit at index: " + i, vector.isNull(i));
+ }
+ }
+
+ /* trigger first realloc */
+ vector.setSafe(valueCapacity, 1, 1);
+ assertEquals(valueCapacity * 2, vector.getValueCapacity());
+
+ for (int i = valueCapacity; i < valueCapacity * 2; i++) {
+ if ((i & 1) == 1) {
+ vector.set(i, 1);
+ }
+ }
+
+ for (int i = 0; i < valueCapacity * 2; i++) {
+ if (((i & 1) == 1) || (i == valueCapacity)) {
+ assertFalse("unexpected cleared bit at index: " + i, vector.isNull(i));
+ } else {
+ assertTrue("unexpected set bit at index: " + i, vector.isNull(i));
+ }
+ }
+
+ /* trigger second realloc */
+ vector.setSafe(valueCapacity * 2, 1, 1);
+ assertEquals(valueCapacity * 4, vector.getValueCapacity());
+
+ for (int i = valueCapacity * 2; i < valueCapacity * 4; i++) {
+ if ((i & 1) == 1) {
+ vector.set(i, 1);
+ }
+ }
+
+ for (int i = 0; i < valueCapacity * 4; i++) {
+ if (((i & 1) == 1) || (i == valueCapacity) || (i == valueCapacity * 2)) {
+ assertFalse("unexpected cleared bit at index: " + i, vector.isNull(i));
+ } else {
+ assertTrue("unexpected set bit at index: " + i, vector.isNull(i));
+ }
+ }
+
+ /* now transfer the vector */
+ TransferPair transferPair = vector.getTransferPair(allocator);
+ transferPair.transfer();
+ final BitVector toVector = (BitVector) transferPair.getTo();
+
+ assertEquals(valueCapacity * 4, toVector.getValueCapacity());
+
+ /* realloc the toVector */
+ toVector.setSafe(valueCapacity * 4, 1, 1);
+
+ for (int i = 0; i < toVector.getValueCapacity(); i++) {
+ if (i <= valueCapacity * 4) {
+ if (((i & 1) == 1) || (i == valueCapacity) ||
+ (i == valueCapacity * 2) || (i == valueCapacity * 4)) {
+ assertFalse("unexpected cleared bit at index: " + i, toVector.isNull(i));
+ } else {
+ assertTrue("unexpected set bit at index: " + i, toVector.isNull(i));
+ }
+ } else {
+ assertTrue("unexpected set bit at index: " + i, toVector.isNull(i));
+ }
+ }
+
+ toVector.close();
+ }
+ }
+
+ @Test
+ public void testBitVector() {
+ // Create a new value vector for 1024 integers
+ try (final BitVector vector = new BitVector(EMPTY_SCHEMA_PATH, allocator)) {
+ vector.allocateNew(1024);
+ vector.setValueCount(1024);
+
+ // Put and set a few values
+ vector.set(0, 1);
+ vector.set(1, 0);
+ vector.set(100, 0);
+ vector.set(1022, 1);
+
+ vector.setValueCount(1024);
+
+ assertEquals(1, vector.get(0));
+ assertEquals(0, vector.get(1));
+ assertEquals(0, vector.get(100));
+ assertEquals(1, vector.get(1022));
+
+ assertEquals(1020, vector.getNullCount());
+
+ // test setting the same value twice
+ vector.set(0, 1);
+ vector.set(0, 1);
+ vector.set(1, 0);
+ vector.set(1, 0);
+ assertEquals(1, vector.get(0));
+ assertEquals(0, vector.get(1));
+
+ // test toggling the values
+ vector.set(0, 0);
+ vector.set(1, 1);
+ assertEquals(0, vector.get(0));
+ assertEquals(1, vector.get(1));
+
+ // should not change
+ assertEquals(1020, vector.getNullCount());
+
+ // Ensure null value
+ assertTrue(vector.isNull(3));
+
+ // unset the previously set bits
+ vector.setNull(0);
+ vector.setNull(1);
+ vector.setNull(100);
+ vector.setNull(1022);
+ // this should set all the array to 0
+ assertEquals(1024, vector.getNullCount());
+
+ // set all the array to 1
+ for (int i = 0; i < 1024; ++i) {
+ assertEquals(1024 - i, vector.getNullCount());
+ vector.set(i, 1);
+ }
+
+ assertEquals(0, vector.getNullCount());
+
+ vector.allocateNew(1015);
+ vector.setValueCount(1015);
+
+ // ensure it has been zeroed
+ assertEquals(1015, vector.getNullCount());
+
+ vector.set(0, 1);
+ vector.set(1014, 1); // ensure that the last item of the last byte is allocated
+
+ assertEquals(1013, vector.getNullCount());
+
+ vector.zeroVector();
+ assertEquals(1015, vector.getNullCount());
+
+ // set all the array to 1
+ for (int i = 0; i < 1015; ++i) {
+ assertEquals(1015 - i, vector.getNullCount());
+ vector.set(i, 1);
+ }
+
+ assertEquals(0, vector.getNullCount());
+ }
+ }
+
+ @Test
+ public void testBitVectorRangeSetAllOnes() {
+ validateRange(1000, 0, 1000);
+ validateRange(1000, 0, 1);
+ validateRange(1000, 1, 2);
+ validateRange(1000, 5, 6);
+ validateRange(1000, 5, 10);
+ validateRange(1000, 5, 150);
+ validateRange(1000, 5, 27);
+ for (int i = 0; i < 8; i++) {
+ for (int j = 0; j < 8; j++) {
+ validateRange(1000, 10 + i, 27 + j);
+ validateRange(1000, i, j);
+ }
+ }
+ }
+
+ private void validateRange(int length, int start, int count) {
+ String desc = "[" + start + ", " + (start + count) + ") ";
+ try (BitVector bitVector = new BitVector("bits", allocator)) {
+ bitVector.reset();
+ bitVector.allocateNew(length);
+ bitVector.setRangeToOne(start, count);
+ for (int i = 0; i < start; i++) {
+ Assert.assertTrue(desc + i, bitVector.isNull(i));
+ }
+ for (int i = start; i < start + count; i++) {
+ Assert.assertEquals(desc + i, 1, bitVector.get(i));
+ }
+ for (int i = start + count; i < length; i++) {
+ Assert.assertTrue(desc + i, bitVector.isNull(i));
+ }
+ }
+ }
+
+ @Test
+ public void testBitVectorHashCode() {
+ final int size = 6;
+ try (final BitVector vector = new BitVector(EMPTY_SCHEMA_PATH, allocator)) {
+ ValueVectorDataPopulator.setVector(vector, 0, 1, null, 0, 1, null);
+
+ int[] hashCodes = new int[size];
+ IntStream.range(0, size).forEach(i -> hashCodes[i] = vector.hashCode(i));
+
+ assertTrue(hashCodes[0] == hashCodes[3]);
+ assertTrue(hashCodes[1] == hashCodes[4]);
+ assertTrue(hashCodes[2] == hashCodes[5]);
+
+ assertFalse(hashCodes[0] == hashCodes[1]);
+ assertFalse(hashCodes[0] == hashCodes[2]);
+ assertFalse(hashCodes[1] == hashCodes[2]);
+
+ MurmurHasher hasher = new MurmurHasher();
+
+ IntStream.range(0, size).forEach(i -> hashCodes[i] = vector.hashCode(i, hasher));
+
+ assertTrue(hashCodes[0] == hashCodes[3]);
+ assertTrue(hashCodes[1] == hashCodes[4]);
+ assertTrue(hashCodes[2] == hashCodes[5]);
+
+ assertFalse(hashCodes[0] == hashCodes[1]);
+ assertFalse(hashCodes[0] == hashCodes[2]);
+ assertFalse(hashCodes[1] == hashCodes[2]);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestBitVectorHelper.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestBitVectorHelper.java
new file mode 100644
index 000000000..9c7e1979d
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestBitVectorHelper.java
@@ -0,0 +1,235 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.junit.Test;
+
+import io.netty.util.internal.PlatformDependent;
+
+public class TestBitVectorHelper {
+ @Test
+ public void testGetNullCount() throws Exception {
+ try (BufferAllocator root = new RootAllocator()) {
+ // test case 1, 1 null value for 0b110
+ ArrowBuf validityBuffer = root.buffer(3);
+ // we set validity buffer to be 0b10110, but only have 3 items with 1st item is null
+ validityBuffer.setByte(0, 0b10110);
+
+ // we will only consider 0b110 here, since we only 3 items and only one is null
+ int count = BitVectorHelper.getNullCount(validityBuffer, 3);
+ assertEquals(count, 1);
+ validityBuffer.close();
+
+ // test case 2, no null value for 0xFF
+ validityBuffer = root.buffer(8);
+ validityBuffer.setByte(0, 0xFF);
+
+ count = BitVectorHelper.getNullCount(validityBuffer, 8);
+ assertEquals(count, 0);
+ validityBuffer.close();
+
+ // test case 3, 1 null value for 0x7F
+ validityBuffer = root.buffer(8);
+ validityBuffer.setByte(0, 0x7F);
+
+ count = BitVectorHelper.getNullCount(validityBuffer, 8);
+ assertEquals(count, 1);
+ validityBuffer.close();
+
+ // test case 4, validity buffer has multiple bytes, 11 items
+ validityBuffer = root.buffer(11);
+ validityBuffer.setByte(0, 0b10101010);
+ validityBuffer.setByte(1, 0b01010101);
+
+ count = BitVectorHelper.getNullCount(validityBuffer, 11);
+ assertEquals(count, 5);
+ validityBuffer.close();
+ }
+ }
+
+ @Test
+ public void testAllBitsNull() {
+ final int bufferLength = 32 * 1024;
+ try (RootAllocator allocator = new RootAllocator(bufferLength);
+ ArrowBuf validityBuffer = allocator.buffer(bufferLength)) {
+
+ validityBuffer.setZero(0, bufferLength);
+ int bitLength = 1024;
+ assertTrue(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, false));
+
+ bitLength = 1027;
+ assertTrue(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, false));
+
+ validityBuffer.setZero(0, bufferLength);
+ bitLength = 1025;
+ BitVectorHelper.setBit(validityBuffer, 12);
+ assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, false));
+
+ validityBuffer.setZero(0, bufferLength);
+ bitLength = 1025;
+ BitVectorHelper.setBit(validityBuffer, 1024);
+ assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, false));
+
+ validityBuffer.setZero(0, bufferLength);
+ bitLength = 1026;
+ BitVectorHelper.setBit(validityBuffer, 1024);
+ assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, false));
+
+ validityBuffer.setZero(0, bufferLength);
+ bitLength = 1027;
+ BitVectorHelper.setBit(validityBuffer, 1025);
+ assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, false));
+
+ validityBuffer.setZero(0, bufferLength);
+ bitLength = 1031;
+ BitVectorHelper.setBit(validityBuffer, 1029);
+ BitVectorHelper.setBit(validityBuffer, 1030);
+ assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, false));
+ }
+ }
+
+ @Test
+ public void testAllBitsSet() {
+ final int bufferLength = 32 * 1024;
+ try (RootAllocator allocator = new RootAllocator(bufferLength);
+ ArrowBuf validityBuffer = allocator.buffer(bufferLength)) {
+
+ PlatformDependent.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1);
+ int bitLength = 1024;
+ assertTrue(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true));
+
+ bitLength = 1028;
+ assertTrue(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true));
+
+ PlatformDependent.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1);
+ bitLength = 1025;
+ BitVectorHelper.unsetBit(validityBuffer, 12);
+ assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true));
+
+ PlatformDependent.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1);
+ bitLength = 1025;
+ BitVectorHelper.unsetBit(validityBuffer, 1024);
+ assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true));
+
+ PlatformDependent.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1);
+ bitLength = 1026;
+ BitVectorHelper.unsetBit(validityBuffer, 1024);
+ assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true));
+
+ PlatformDependent.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1);
+ bitLength = 1027;
+ BitVectorHelper.unsetBit(validityBuffer, 1025);
+ assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true));
+
+ PlatformDependent.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1);
+ bitLength = 1031;
+ BitVectorHelper.unsetBit(validityBuffer, 1029);
+ BitVectorHelper.unsetBit(validityBuffer, 1030);
+ assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true));
+ }
+ }
+
+ @Test
+ public void testConcatBits() {
+ try (RootAllocator allocator = new RootAllocator(1024 * 1024)) {
+ try (ArrowBuf buf1 = allocator.buffer(1024);
+ ArrowBuf buf2 = allocator.buffer(1024);
+ ArrowBuf output = allocator.buffer(1024)) {
+
+ buf1.setZero(0, buf1.capacity());
+ buf2.setZero(0, buf2.capacity());
+
+ final int maxCount = 100;
+ for (int i = 0; i < maxCount; i++) {
+ if (i % 3 == 0) {
+ BitVectorHelper.setBit(buf1, i);
+ BitVectorHelper.setBit(buf2, i);
+ }
+ }
+
+ // test the case where the number of bits for both sets are multiples of 8.
+ concatAndVerify(buf1, 40, buf2, 48, output);
+
+ // only the number of bits in the first set is a multiple of 8
+ concatAndVerify(buf1, 32, buf2, 47, output);
+
+ // only the number of bits in the second set is a multiple of 8
+ concatAndVerify(buf1, 31, buf2, 48, output);
+
+ // neither set has a size that is a multiple of 8
+ concatAndVerify(buf1, 27, buf2, 52, output);
+
+ // the remaining bits in the second set is spread in two bytes
+ concatAndVerify(buf1, 31, buf2, 55, output);
+ }
+ }
+ }
+
+ @Test
+ public void testConcatBitsInPlace() {
+ try (RootAllocator allocator = new RootAllocator(1024 * 1024)) {
+ try (ArrowBuf buf1 = allocator.buffer(1024);
+ ArrowBuf buf2 = allocator.buffer(1024)) {
+
+ buf1.setZero(0, buf1.capacity());
+ buf2.setZero(0, buf2.capacity());
+
+ final int maxCount = 100;
+ for (int i = 0; i < maxCount; i++) {
+ if (i % 3 == 0) {
+ BitVectorHelper.setBit(buf1, i);
+ BitVectorHelper.setBit(buf2, i);
+ }
+ }
+
+ // test the case where the number of bits for both sets are multiples of 8.
+ concatAndVerify(buf1, 40, buf2, 48, buf1);
+
+ // only the number of bits in the first set is a multiple of 8
+ concatAndVerify(buf1, 32, buf2, 47, buf1);
+
+ // only the number of bits in the second set is a multiple of 8
+ concatAndVerify(buf1, 31, buf2, 48, buf1);
+
+ // neither set has a size that is a multiple of 8
+ concatAndVerify(buf1, 27, buf2, 52, buf1);
+
+ // the remaining bits in the second set is spread in two bytes
+ concatAndVerify(buf1, 31, buf2, 55, buf1);
+ }
+ }
+ }
+
+ private void concatAndVerify(ArrowBuf buf1, int count1, ArrowBuf buf2, int count2, ArrowBuf output) {
+ BitVectorHelper.concatBits(buf1, count1, buf2, count2, output);
+ int outputIdx = 0;
+ for (int i = 0; i < count1; i++, outputIdx++) {
+ assertEquals(BitVectorHelper.get(output, outputIdx), BitVectorHelper.get(buf1, i));
+ }
+ for (int i = 0; i < count2; i++, outputIdx++) {
+ assertEquals(BitVectorHelper.get(output, outputIdx), BitVectorHelper.get(buf2, i));
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestBufferOwnershipTransfer.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestBufferOwnershipTransfer.java
new file mode 100644
index 000000000..8efadad9b
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestBufferOwnershipTransfer.java
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.ReferenceManager;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.CallBack;
+import org.junit.Test;
+
+public class TestBufferOwnershipTransfer {
+
+ @Test
+ public void testTransferFixedWidth() {
+ BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+ BufferAllocator childAllocator1 = allocator.newChildAllocator("child1", 100000, 100000);
+ BufferAllocator childAllocator2 = allocator.newChildAllocator("child2", 100000, 100000);
+
+ IntVector v1 = new IntVector("v1", childAllocator1);
+ v1.allocateNew();
+ v1.setValueCount(4095);
+ long totalAllocatedMemory = childAllocator1.getAllocatedMemory();
+
+ IntVector v2 = new IntVector("v2", childAllocator2);
+
+ v1.makeTransferPair(v2).transfer();
+
+ assertEquals(0, childAllocator1.getAllocatedMemory());
+ assertEquals(totalAllocatedMemory, childAllocator2.getAllocatedMemory());
+
+ v1.close();
+ v2.close();
+ childAllocator1.close();
+ childAllocator2.close();
+ allocator.close();
+ }
+
+ @Test
+ public void testTransferVariableWidth() {
+ BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+ BufferAllocator childAllocator1 = allocator.newChildAllocator("child1", 100000, 100000);
+ BufferAllocator childAllocator2 = allocator.newChildAllocator("child2", 100000, 100000);
+
+ VarCharVector v1 = new VarCharVector("v1", childAllocator1);
+ v1.allocateNew();
+ v1.setSafe(4094, "hello world".getBytes(), 0, 11);
+ v1.setValueCount(4001);
+
+ VarCharVector v2 = new VarCharVector("v2", childAllocator2);
+ long memoryBeforeTransfer = childAllocator1.getAllocatedMemory();
+
+ v1.makeTransferPair(v2).transfer();
+
+ assertEquals(0, childAllocator1.getAllocatedMemory());
+ assertEquals(memoryBeforeTransfer, childAllocator2.getAllocatedMemory());
+
+ v1.close();
+ v2.close();
+ childAllocator1.close();
+ childAllocator2.close();
+ allocator.close();
+ }
+
+ private static class Pointer<T> {
+ T value;
+ }
+
+ private static CallBack newTriggerCallback(final Pointer<Boolean> trigger) {
+ trigger.value = false;
+ return new CallBack() {
+ @Override
+ public void doWork() {
+ trigger.value = true;
+ }
+ };
+ }
+
+ @Test
+ public void emptyListTransferShouldNotTriggerSchemaChange() {
+ final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+
+ final Pointer<Boolean> trigger1 = new Pointer<>();
+ final Pointer<Boolean> trigger2 = new Pointer<>();
+ final ListVector v1 = new ListVector("v1", allocator,
+ FieldType.nullable(ArrowType.Null.INSTANCE),
+ newTriggerCallback(trigger1));
+ final ListVector v2 = new ListVector("v2", allocator,
+ FieldType.nullable(ArrowType.Null.INSTANCE),
+ newTriggerCallback(trigger2));
+
+ try {
+ // since we are working with empty vectors, their internal
+ // buffers will be allocator.EMPTY which use
+ // ReferenceManager.NO_OP instance and transfer() is not
+ // supported
+ v1.makeTransferPair(v2).transfer();
+ } catch (Exception e) {
+ assertTrue(e instanceof UnsupportedOperationException);
+ assertTrue(e.getMessage().contains(ReferenceManager.NO_OP_ERROR_MESSAGE));
+ }
+
+ assertFalse(trigger1.value);
+ assertFalse(trigger2.value);
+
+ v1.close();
+ v2.close();
+ allocator.close();
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestCopyFrom.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestCopyFrom.java
new file mode 100644
index 000000000..3786f63c3
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestCopyFrom.java
@@ -0,0 +1,1104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.TestUtils.newVector;
+import static org.junit.Assert.*;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+import java.math.BigDecimal;
+import java.nio.charset.Charset;
+import java.time.Duration;
+import java.time.Period;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/*
+ * Tested field types:
+ *
+ * NullableInt
+ * NullableBigInt
+ * NullableFloat4
+ * NullableFloat8
+ * NullableBit
+ * NullableDecimal
+ * NullableIntervalDay
+ * NullableIntervalYear
+ * NullableSmallInt
+ * NullableTinyInt
+ * NullableVarChar
+ * NullableTimeMicro
+ * NullableTimeMilli
+ * NullableTimeStamp*
+ */
+
+public class TestCopyFrom {
+
+ private static final String EMPTY_SCHEMA_PATH = "";
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test /* NullableVarChar */
+ public void testCopyFromWithNulls() {
+ try (final VarCharVector vector =
+ newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator);
+ final VarCharVector vector2 =
+ newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator)) {
+ vector.allocateNew();
+ assertTrue(vector.getValueCapacity() >= 1);
+ assertEquals(0, vector.getValueCount());
+ int initialCapacity = vector.getValueCapacity();
+
+ for (int i = 0; i < initialCapacity; i++) {
+ if (i % 3 == 0) {
+ continue;
+ }
+ byte[] b = Integer.toString(i).getBytes();
+ vector.setSafe(i, b, 0, b.length);
+ }
+
+ /* NO reAlloc() should have happened in setSafe() */
+ int capacity = vector.getValueCapacity();
+ assertEquals(initialCapacity, capacity);
+
+ vector.setValueCount(initialCapacity);
+
+ for (int i = 0; i < initialCapacity; i++) {
+ if (i % 3 == 0) {
+ assertNull(vector.getObject(i));
+ } else {
+ assertEquals(
+ "unexpected value at index: " + i,
+ Integer.toString(i),
+ vector.getObject(i).toString());
+ }
+ }
+
+ vector2.setInitialCapacity(initialCapacity);
+ vector2.allocateNew();
+ capacity = vector2.getValueCapacity();
+ assertEquals(initialCapacity, capacity);
+
+ for (int i = 0; i < initialCapacity; i++) {
+ vector2.copyFromSafe(i, i, vector);
+ if (i % 3 == 0) {
+ assertNull(vector2.getObject(i));
+ } else {
+ assertEquals(
+ "unexpected value at index: " + i,
+ Integer.toString(i),
+ vector2.getObject(i).toString());
+ }
+ }
+
+ /* NO reAlloc() should have happened in copyFrom */
+ capacity = vector2.getValueCapacity();
+ assertEquals(initialCapacity, capacity);
+
+ vector2.setValueCount(initialCapacity);
+
+ for (int i = 0; i < initialCapacity; i++) {
+ if (i % 3 == 0) {
+ assertNull(vector2.getObject(i));
+ } else {
+ assertEquals(
+ "unexpected value at index: " + i,
+ Integer.toString(i),
+ vector2.getObject(i).toString());
+ }
+ }
+ }
+ }
+
+ @Test /* NullableVarChar */
+ public void testCopyFromWithNulls1() {
+ try (final VarCharVector vector =
+ newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator);
+ final VarCharVector vector2 =
+ newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator)) {
+ vector.allocateNew();
+ assertTrue(vector.getValueCapacity() >= 1);
+ assertEquals(0, vector.getValueCount());
+ int initialCapacity = vector.getValueCapacity();
+
+ for (int i = 0; i < initialCapacity; i++) {
+ if (i % 3 == 0) {
+ continue;
+ }
+ byte[] b = Integer.toString(i).getBytes();
+ vector.setSafe(i, b, 0, b.length);
+ }
+
+ /* NO reAlloc() should have happened in setSafe() */
+ int capacity = vector.getValueCapacity();
+ assertEquals(initialCapacity, capacity);
+
+ vector.setValueCount(initialCapacity);
+
+ for (int i = 0; i < initialCapacity; i++) {
+ if (i % 3 == 0) {
+ assertNull(vector.getObject(i));
+ } else {
+ assertEquals(
+ "unexpected value at index: " + i,
+ Integer.toString(i),
+ vector.getObject(i).toString());
+ }
+ }
+
+ /* set lesser initial capacity than actually needed
+ * to trigger reallocs in copyFromSafe()
+ */
+ vector2.allocateNew((initialCapacity / 4) * 10, initialCapacity / 4);
+
+ capacity = vector2.getValueCapacity();
+ assertTrue(capacity >= initialCapacity / 4);
+ assertTrue(capacity < initialCapacity / 2);
+
+ for (int i = 0; i < initialCapacity; i++) {
+ vector2.copyFromSafe(i, i, vector);
+ if (i % 3 == 0) {
+ assertNull(vector2.getObject(i));
+ } else {
+ assertEquals(
+ "unexpected value at index: " + i,
+ Integer.toString(i),
+ vector2.getObject(i).toString());
+ }
+ }
+
+ /* 2 reAllocs should have happened in copyFromSafe() */
+ capacity = vector2.getValueCapacity();
+ assertTrue(capacity >= initialCapacity);
+
+ vector2.setValueCount(initialCapacity);
+
+ for (int i = 0; i < initialCapacity; i++) {
+ if (i % 3 == 0) {
+ assertNull(vector2.getObject(i));
+ } else {
+ assertEquals(
+ "unexpected value at index: " + i,
+ Integer.toString(i),
+ vector2.getObject(i).toString());
+ }
+ }
+ }
+ }
+
+ @Test /* IntVector */
+ public void testCopyFromWithNulls2() {
+ try (final IntVector vector1 = new IntVector(EMPTY_SCHEMA_PATH, allocator);
+ final IntVector vector2 = new IntVector(EMPTY_SCHEMA_PATH, allocator)) {
+
+ vector1.allocateNew();
+ assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION);
+ assertEquals(0, vector1.getValueCount());
+ int initialCapacity = vector1.getValueCapacity();
+
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ continue;
+ }
+ vector1.setSafe(i, 1000 + i);
+ }
+
+ vector1.setValueCount(initialCapacity);
+
+ /* No realloc should have happened in setSafe or
+ * setValueCount
+ */
+ assertEquals(initialCapacity, vector1.getValueCapacity());
+ assertEquals(initialCapacity, vector1.getValueCount());
+
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ assertNull(vector1.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, 1000 + i, vector1.get(i));
+ }
+ }
+
+ /* set lesser initial capacity than actually needed
+ * to trigger reallocs in copyFromSafe()
+ */
+ vector2.allocateNew(initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() >= initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() < initialCapacity / 2);
+
+ for (int i = 0; i < initialCapacity; i++) {
+ vector2.copyFromSafe(i, i, vector1);
+ }
+
+ /* 2 realloc should have happened in copyFromSafe() */
+ assertTrue(vector2.getValueCapacity() >= initialCapacity);
+ vector2.setValueCount(initialCapacity * 2);
+ /* setValueCount() should have done another realloc */
+ assertEquals(initialCapacity * 2, vector2.getValueCount());
+ assertTrue(vector2.getValueCapacity() >= initialCapacity * 2);
+
+ /* check vector data after copy and realloc */
+ for (int i = 0; i < initialCapacity * 2; i++) {
+ if (((i & 1) == 0) || (i >= initialCapacity)) {
+ assertNull(vector2.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, 1000 + i, vector2.get(i));
+ }
+ }
+ }
+ }
+
+ @Test /* BigIntVector */
+ public void testCopyFromWithNulls3() {
+ try (final BigIntVector vector1 = new BigIntVector(EMPTY_SCHEMA_PATH, allocator);
+ final BigIntVector vector2 = new BigIntVector(EMPTY_SCHEMA_PATH, allocator)) {
+
+ vector1.allocateNew();
+ assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION);
+ assertEquals(0, vector1.getValueCount());
+ int initialCapacity = vector1.getValueCapacity();
+
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ continue;
+ }
+ vector1.setSafe(i, 10000000000L + (long) i);
+ }
+
+ vector1.setValueCount(initialCapacity);
+
+ /* No realloc should have happened in setSafe or
+ * setValueCount
+ */
+ assertEquals(initialCapacity, vector1.getValueCapacity());
+ assertEquals(initialCapacity, vector1.getValueCount());
+
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ assertNull(vector1.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, 10000000000L + (long) i, vector1.get(i));
+ }
+ }
+
+ /* set lesser initial capacity than actually needed
+ * to trigger reallocs in copyFromSafe()
+ */
+ vector2.allocateNew(initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() >= initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() < initialCapacity / 2);
+
+ for (int i = 0; i < initialCapacity; i++) {
+ vector2.copyFromSafe(i, i, vector1);
+ }
+
+ /* 2 realloc should have happened in copyFromSafe() */
+ assertTrue(vector2.getValueCapacity() >= initialCapacity);
+ vector2.setValueCount(initialCapacity * 2);
+ /* setValueCount() should have done another realloc */
+ assertEquals(initialCapacity * 2, vector2.getValueCount());
+ assertTrue(vector2.getValueCapacity() >= initialCapacity * 2);
+
+ /* check vector data after copy and realloc */
+ for (int i = 0; i < initialCapacity * 2; i++) {
+ if (((i & 1) == 0) || (i >= initialCapacity)) {
+ assertNull(vector2.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, 10000000000L + (long) i, vector2.get(i));
+ }
+ }
+ }
+ }
+
+ @Test /* BitVector */
+ public void testCopyFromWithNulls4() {
+ try (final BitVector vector1 = new BitVector(EMPTY_SCHEMA_PATH, allocator);
+ final BitVector vector2 = new BitVector(EMPTY_SCHEMA_PATH, allocator)) {
+
+ vector1.setInitialCapacity(4096);
+ vector1.allocateNew();
+ assertEquals(4096, vector1.getValueCapacity());
+ assertEquals(0, vector1.getValueCount());
+
+ int counter = 0;
+ for (int i = 0; i < 4096; i++) {
+ if ((i & 1) == 0) {
+ continue;
+ }
+ if ((counter & 1) == 0) {
+ vector1.setSafe(i, 1);
+ } else {
+ vector1.setSafe(i, 0);
+ }
+ counter++;
+ }
+
+ vector1.setValueCount(4096);
+
+ /* No realloc should have happened in setSafe or
+ * setValueCount
+ */
+ assertEquals(4096, vector1.getValueCapacity());
+ assertEquals(4096, vector1.getValueCount());
+
+ counter = 0;
+ for (int i = 0; i < 4096; i++) {
+ if ((i & 1) == 0) {
+ assertNull(vector1.getObject(i));
+ } else {
+ if ((counter & 1) == 0) {
+ assertTrue(vector1.getObject(i));
+ } else {
+ assertFalse(vector1.getObject(i));
+ }
+ counter++;
+ }
+ }
+
+ /* set lesser initial capacity than actually needed
+ * to trigger reallocs in copyFromSafe()
+ */
+ vector2.allocateNew(1024);
+ assertEquals(1024, vector2.getValueCapacity());
+
+ for (int i = 0; i < 4096; i++) {
+ vector2.copyFromSafe(i, i, vector1);
+ }
+
+ /* 2 realloc should have happened in copyFromSafe() */
+ assertEquals(4096, vector2.getValueCapacity());
+ vector2.setValueCount(8192);
+ /* setValueCount() should have done another realloc */
+ assertEquals(8192, vector2.getValueCount());
+ assertEquals(8192, vector2.getValueCapacity());
+
+ /* check vector data after copy and realloc */
+ counter = 0;
+ for (int i = 0; i < 8192; i++) {
+ if (((i & 1) == 0) || (i >= 4096)) {
+ assertNull(vector2.getObject(i));
+ } else {
+ if ((counter & 1) == 0) {
+ assertTrue(vector2.getObject(i));
+ } else {
+ assertFalse(vector2.getObject(i));
+ }
+ counter++;
+ }
+ }
+ }
+ }
+
+ @Test /* Float4Vector */
+ public void testCopyFromWithNulls5() {
+ try (final Float4Vector vector1 = new Float4Vector(EMPTY_SCHEMA_PATH, allocator);
+ final Float4Vector vector2 = new Float4Vector(EMPTY_SCHEMA_PATH, allocator)) {
+
+ vector1.allocateNew();
+ assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION);
+ assertEquals(0, vector1.getValueCount());
+ int initialCapacity = vector1.getValueCapacity();
+
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ continue;
+ }
+ vector1.setSafe(i, 100.25f + (float) i);
+ }
+
+ vector1.setValueCount(initialCapacity);
+
+ /* No realloc should have happened in setSafe or
+ * setValueCount
+ */
+ assertEquals(initialCapacity, vector1.getValueCapacity());
+ assertEquals(initialCapacity, vector1.getValueCount());
+
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ assertNull(vector1.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, 100.25f + (float) i, vector1.get(i), 0);
+ }
+ }
+
+ /* set lesser initial capacity than actually needed
+ * to trigger reallocs in copyFromSafe()
+ */
+ vector2.allocateNew(initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() >= initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() < initialCapacity / 2);
+
+ for (int i = 0; i < initialCapacity; i++) {
+ vector2.copyFromSafe(i, i, vector1);
+ }
+
+ /* 2 realloc should have happened in copyFromSafe() */
+ assertTrue(vector2.getValueCapacity() >= initialCapacity);
+ vector2.setValueCount(initialCapacity * 2);
+ /* setValueCount() should have done another realloc */
+ assertEquals(initialCapacity * 2, vector2.getValueCount());
+ assertTrue(vector2.getValueCapacity() >= initialCapacity * 2);
+
+ /* check vector data after copy and realloc */
+ for (int i = 0; i < initialCapacity * 2; i++) {
+ if (((i & 1) == 0) || (i >= initialCapacity)) {
+ assertNull(vector2.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, 100.25f + i * 1.0f, vector2.get(i), 0);
+ }
+ }
+ }
+ }
+
+ @Test /* Float8Vector */
+ public void testCopyFromWithNulls6() {
+ try (final Float8Vector vector1 = new Float8Vector(EMPTY_SCHEMA_PATH, allocator);
+ final Float8Vector vector2 = new Float8Vector(EMPTY_SCHEMA_PATH, allocator)) {
+
+ vector1.allocateNew();
+ assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION);
+ assertEquals(0, vector1.getValueCount());
+ int initialCapacity = vector1.getValueCapacity();
+
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ continue;
+ }
+ vector1.setSafe(i, 123456.7865 + (double) i);
+ }
+
+ vector1.setValueCount(initialCapacity);
+
+ /* No realloc should have happened in setSafe or
+ * setValueCount
+ */
+ assertEquals(initialCapacity, vector1.getValueCapacity());
+ assertEquals(initialCapacity, vector1.getValueCount());
+
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ assertNull(vector1.getObject(i));
+ } else {
+ assertEquals(
+ "unexpected value at index: " + i, 123456.7865 + (double) i, vector1.get(i), 0);
+ }
+ }
+
+ /* set lesser initial capacity than actually needed
+ * to trigger reallocs in copyFromSafe()
+ */
+ vector2.allocateNew(initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() >= initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() < initialCapacity / 2);
+
+ for (int i = 0; i < initialCapacity; i++) {
+ vector2.copyFromSafe(i, i, vector1);
+ }
+
+ /* 2 realloc should have happened in copyFromSafe() */
+ assertTrue(vector2.getValueCapacity() >= initialCapacity);
+ vector2.setValueCount(initialCapacity * 2);
+ /* setValueCount() should have done another realloc */
+ assertEquals(initialCapacity * 2, vector2.getValueCount());
+ assertTrue(vector2.getValueCapacity() >= initialCapacity * 2);
+
+ /* check vector data after copy and realloc */
+ for (int i = 0; i < initialCapacity * 2; i++) {
+ if (((i & 1) == 0) || (i >= initialCapacity)) {
+ assertNull(vector2.getObject(i));
+ } else {
+ assertEquals(
+ "unexpected value at index: " + i, 123456.7865 + (double) i, vector2.get(i), 0);
+ }
+ }
+ }
+ }
+
+ @Test /* IntervalDayVector */
+ public void testCopyFromWithNulls7() {
+ try (final IntervalDayVector vector1 = new IntervalDayVector(EMPTY_SCHEMA_PATH, allocator);
+ final IntervalDayVector vector2 = new IntervalDayVector(EMPTY_SCHEMA_PATH, allocator)) {
+
+ vector1.allocateNew();
+ assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION);
+ assertEquals(0, vector1.getValueCount());
+ int initialCapacity = vector1.getValueCapacity();
+
+ final int days = 10;
+ final int milliseconds = 10000;
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ continue;
+ }
+ vector1.setSafe(i, days + i, milliseconds + i);
+ }
+
+ vector1.setValueCount(initialCapacity);
+
+ /* No realloc should have happened in setSafe or
+ * setValueCount
+ */
+ assertEquals(initialCapacity, vector1.getValueCapacity());
+ assertEquals(initialCapacity, vector1.getValueCount());
+
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ assertNull(vector1.getObject(i));
+ } else {
+ final Duration d = vector1.getObject(i);
+ assertEquals(days + i, d.toDays());
+ assertEquals(milliseconds + i, d.minusDays(days + i).toMillis());
+ }
+ }
+
+ /* set lesser initial capacity than actually needed
+ * to trigger reallocs in copyFromSafe()
+ */
+ vector2.allocateNew(initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() >= initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() < initialCapacity / 2);
+
+ for (int i = 0; i < initialCapacity; i++) {
+ vector2.copyFromSafe(i, i, vector1);
+ }
+
+ /* 2 realloc should have happened in copyFromSafe() */
+ assertTrue(vector2.getValueCapacity() >= initialCapacity);
+ vector2.setValueCount(initialCapacity * 2);
+ /* setValueCount() should have done another realloc */
+ assertEquals(initialCapacity * 2, vector2.getValueCount());
+ assertTrue(vector2.getValueCapacity() >= initialCapacity * 2);
+
+ /* check vector data after copy and realloc */
+ for (int i = 0; i < initialCapacity * 2; i++) {
+ if (((i & 1) == 0) || (i >= initialCapacity)) {
+ assertNull(vector2.getObject(i));
+ } else {
+ final Duration d = vector2.getObject(i);
+ assertEquals(days + i, d.toDays());
+ assertEquals(milliseconds + i, d.minusDays(days + i).toMillis());
+ }
+ }
+ }
+ }
+
+ @Test /* IntervalYearVector */
+ public void testCopyFromWithNulls8() {
+ try (final IntervalYearVector vector1 = new IntervalYearVector(EMPTY_SCHEMA_PATH, allocator);
+ final IntervalYearVector vector2 = new IntervalYearVector(EMPTY_SCHEMA_PATH, allocator)) {
+
+ vector1.allocateNew();
+ assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION);
+ assertEquals(0, vector1.getValueCount());
+ int initialCapacity = vector1.getValueCapacity();
+
+ final int interval = 30; /* 2 years 6 months */
+ final Period[] periods = new Period[4096];
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ continue;
+ }
+ vector1.setSafe(i, interval + i);
+ final int years = (interval + i) / org.apache.arrow.vector.util.DateUtility.yearsToMonths;
+ final int months = (interval + i) % org.apache.arrow.vector.util.DateUtility.yearsToMonths;
+ periods[i] = Period.ofYears(years).plusMonths(months).normalized();
+ }
+
+ vector1.setValueCount(initialCapacity);
+
+ /* No realloc should have happened in setSafe or
+ * setValueCount
+ */
+ assertEquals(initialCapacity, vector1.getValueCapacity());
+ assertEquals(initialCapacity, vector1.getValueCount());
+
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ assertNull(vector1.getObject(i));
+ } else {
+ final Period p = vector1.getObject(i).normalized();
+ assertEquals(interval + i, vector1.get(i));
+ assertEquals(periods[i], p);
+ }
+ }
+
+ /* set lesser initial capacity than actually needed
+ * to trigger reallocs in copyFromSafe()
+ */
+ vector2.allocateNew(initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() >= initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() < initialCapacity / 2);
+
+ for (int i = 0; i < initialCapacity; i++) {
+ vector2.copyFromSafe(i, i, vector1);
+ }
+
+ /* 2 realloc should have happened in copyFromSafe() */
+ assertTrue(vector2.getValueCapacity() >= initialCapacity);
+ vector2.setValueCount(initialCapacity * 2);
+ /* setValueCount() should have done another realloc */
+ assertEquals(initialCapacity * 2, vector2.getValueCount());
+ assertTrue(vector2.getValueCapacity() >= initialCapacity * 2);
+
+ /* check vector data after copy and realloc */
+ for (int i = 0; i < initialCapacity * 2; i++) {
+ if (((i & 1) == 0) || (i >= initialCapacity)) {
+ assertNull(vector2.getObject(i));
+ } else {
+ final Period p = vector2.getObject(i).normalized();
+ assertEquals(periods[i], p);
+ }
+ }
+ }
+ }
+
+ @Test /* SmallIntVector */
+ public void testCopyFromWithNulls9() {
+ try (final SmallIntVector vector1 = new SmallIntVector(EMPTY_SCHEMA_PATH, allocator);
+ final SmallIntVector vector2 = new SmallIntVector(EMPTY_SCHEMA_PATH, allocator)) {
+
+ vector1.allocateNew();
+ assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION);
+ assertEquals(0, vector1.getValueCount());
+ int initialCapacity = vector1.getValueCapacity();
+
+ final short val = 1000;
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ continue;
+ }
+ vector1.setSafe(i, val + (short) i);
+ }
+
+ vector1.setValueCount(initialCapacity);
+
+ /* No realloc should have happened in setSafe or
+ * setValueCount
+ */
+ assertEquals(initialCapacity, vector1.getValueCapacity());
+ assertEquals(initialCapacity, vector1.getValueCount());
+
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ assertNull(vector1.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, val + (short) i, vector1.get(i));
+ }
+ }
+
+ /* set lesser initial capacity than actually needed
+ * to trigger reallocs in copyFromSafe()
+ */
+ vector2.allocateNew(initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() >= initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() < initialCapacity / 2);
+
+ for (int i = 0; i < initialCapacity; i++) {
+ vector2.copyFromSafe(i, i, vector1);
+ }
+
+ /* 2 realloc should have happened in copyFromSafe() */
+ assertTrue(vector2.getValueCapacity() >= initialCapacity);
+ vector2.setValueCount(initialCapacity * 2);
+ /* setValueCount() should have done another realloc */
+ assertEquals(initialCapacity * 2, vector2.getValueCount());
+ assertTrue(vector2.getValueCapacity() >= initialCapacity * 2);
+
+ /* check vector data after copy and realloc */
+ for (int i = 0; i < initialCapacity * 2; i++) {
+ if (((i & 1) == 0) || (i >= initialCapacity)) {
+ assertNull(vector2.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, val + (short) i, vector2.get(i));
+ }
+ }
+ }
+ }
+
+ @Test /* TimeMicroVector */
+ public void testCopyFromWithNulls10() {
+ try (final TimeMicroVector vector1 = new TimeMicroVector(EMPTY_SCHEMA_PATH, allocator);
+ final TimeMicroVector vector2 = new TimeMicroVector(EMPTY_SCHEMA_PATH, allocator)) {
+
+ vector1.allocateNew();
+ assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION);
+ assertEquals(0, vector1.getValueCount());
+ int initialCapacity = vector1.getValueCapacity();
+
+ final long val = 100485765432L;
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ continue;
+ }
+ vector1.setSafe(i, val + (long) i);
+ }
+
+ vector1.setValueCount(initialCapacity);
+
+ /* No realloc should have happened in setSafe or
+ * setValueCount
+ */
+ assertEquals(initialCapacity, vector1.getValueCapacity());
+ assertEquals(initialCapacity, vector1.getValueCount());
+
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ assertNull(vector1.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, val + (long) i, vector1.get(i));
+ }
+ }
+
+ /* set lesser initial capacity than actually needed
+ * to trigger reallocs in copyFromSafe()
+ */
+ vector2.allocateNew(initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() >= initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() < initialCapacity / 2);
+
+ for (int i = 0; i < initialCapacity; i++) {
+ vector2.copyFromSafe(i, i, vector1);
+ }
+
+ /* 2 realloc should have happened in copyFromSafe() */
+ assertTrue(vector2.getValueCapacity() >= initialCapacity);
+ vector2.setValueCount(initialCapacity * 2);
+ /* setValueCount() should have done another realloc */
+ assertEquals(initialCapacity * 2, vector2.getValueCount());
+ assertTrue(vector2.getValueCapacity() >= initialCapacity * 2);
+
+ /* check vector data after copy and realloc */
+ for (int i = 0; i < initialCapacity * 2; i++) {
+ if (((i & 1) == 0) || (i >= initialCapacity)) {
+ assertNull(vector2.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, val + (long) i, vector2.get(i));
+ }
+ }
+ }
+ }
+
+ @Test /* TimeMilliVector */
+ public void testCopyFromWithNulls11() {
+ try (final TimeMilliVector vector1 = new TimeMilliVector(EMPTY_SCHEMA_PATH, allocator);
+ final TimeMilliVector vector2 = new TimeMilliVector(EMPTY_SCHEMA_PATH, allocator)) {
+
+ vector1.allocateNew();
+ assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION);
+ assertEquals(0, vector1.getValueCount());
+ int initialCapacity = vector1.getValueCapacity();
+
+ final int val = 1000;
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ continue;
+ }
+ vector1.setSafe(i, val + i);
+ }
+
+ vector1.setValueCount(initialCapacity);
+
+ /* No realloc should have happened in setSafe or
+ * setValueCount
+ */
+ assertEquals(initialCapacity, vector1.getValueCapacity());
+ assertEquals(initialCapacity, vector1.getValueCount());
+
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ assertNull(vector1.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, val + i, vector1.get(i));
+ }
+ }
+
+ /* set lesser initial capacity than actually needed
+ * to trigger reallocs in copyFromSafe()
+ */
+ vector2.allocateNew(initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() >= initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() < initialCapacity / 2);
+
+ for (int i = 0; i < initialCapacity; i++) {
+ vector2.copyFromSafe(i, i, vector1);
+ }
+
+ /* 2 realloc should have happened in copyFromSafe() */
+ assertTrue(vector2.getValueCapacity() >= initialCapacity);
+ vector2.setValueCount(initialCapacity * 2);
+ /* setValueCount() should have done another realloc */
+ assertEquals(initialCapacity * 2, vector2.getValueCount());
+ assertTrue(vector2.getValueCapacity() >= initialCapacity * 2);
+
+ /* check vector data after copy and realloc */
+ for (int i = 0; i < initialCapacity * 2; i++) {
+ if (((i & 1) == 0) || (i >= initialCapacity)) {
+ assertNull(vector2.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, val + i, vector2.get(i));
+ }
+ }
+ }
+ }
+
+ @Test /* TinyIntVector */
+ public void testCopyFromWithNulls12() {
+ try (final TinyIntVector vector1 = new TinyIntVector(EMPTY_SCHEMA_PATH, allocator);
+ final TinyIntVector vector2 = new TinyIntVector(EMPTY_SCHEMA_PATH, allocator)) {
+
+ vector1.allocateNew();
+ assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION);
+ assertEquals(0, vector1.getValueCount());
+ int initialCapacity = vector1.getValueCapacity();
+
+ byte val = -128;
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ continue;
+ }
+ vector1.setSafe(i, val);
+ val++;
+ }
+
+ vector1.setValueCount(initialCapacity);
+
+ /* No realloc should have happened in setSafe or
+ * setValueCount
+ */
+ assertEquals(initialCapacity, vector1.getValueCapacity());
+ assertEquals(initialCapacity, vector1.getValueCount());
+
+ val = -128;
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ assertNull(vector1.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, val, vector1.get(i));
+ val++;
+ }
+ }
+
+ /* set lesser initial capacity than actually needed
+ * to trigger reallocs in copyFromSafe()
+ */
+ vector2.allocateNew(initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() >= initialCapacity / 4);
+
+ for (int i = 0; i < initialCapacity; i++) {
+ vector2.copyFromSafe(i, i, vector1);
+ }
+
+ /* 2 realloc should have happened in copyFromSafe() */
+ assertTrue(vector2.getValueCapacity() >= initialCapacity);
+ vector2.setValueCount(initialCapacity * 2);
+ /* setValueCount() should have done another realloc */
+ assertEquals(initialCapacity * 2, vector2.getValueCount());
+ assertTrue(vector2.getValueCapacity() >= initialCapacity * 2);
+
+ /* check vector data after copy and realloc */
+ val = -128;
+ for (int i = 0; i < initialCapacity * 2; i++) {
+ if (((i & 1) == 0) || (i >= initialCapacity)) {
+ assertNull(vector2.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, val, vector2.get(i));
+ val++;
+ }
+ }
+ }
+ }
+
+ @Test /* DecimalVector */
+ public void testCopyFromWithNulls13() {
+ try (final DecimalVector vector1 = new DecimalVector(EMPTY_SCHEMA_PATH, allocator, 30, 16);
+ final DecimalVector vector2 = new DecimalVector(EMPTY_SCHEMA_PATH, allocator, 30, 16)) {
+
+ vector1.allocateNew();
+ assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION);
+ assertEquals(0, vector1.getValueCount());
+ int initialCapacity = vector1.getValueCapacity();
+
+ final double baseValue = 104567897654.876543654;
+ final BigDecimal[] decimals = new BigDecimal[4096];
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ continue;
+ }
+ BigDecimal decimal = new BigDecimal(baseValue + (double) i);
+ vector1.setSafe(i, decimal);
+ decimals[i] = decimal;
+ }
+
+ vector1.setValueCount(initialCapacity);
+
+ /* No realloc should have happened in setSafe or
+ * setValueCount
+ */
+ assertEquals(initialCapacity, vector1.getValueCapacity());
+ assertEquals(initialCapacity, vector1.getValueCount());
+
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ assertNull(vector1.getObject(i));
+ } else {
+ final BigDecimal decimal = vector1.getObject(i);
+ assertEquals(decimals[i], decimal);
+ }
+ }
+
+ /* set lesser initial capacity than actually needed
+ * to trigger reallocs in copyFromSafe()
+ */
+ vector2.allocateNew(initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() >= initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() < initialCapacity / 2);
+
+ for (int i = 0; i < initialCapacity; i++) {
+ vector2.copyFromSafe(i, i, vector1);
+ }
+
+ /* 2 realloc should have happened in copyFromSafe() */
+ assertTrue(vector2.getValueCapacity() >= initialCapacity);
+ vector2.setValueCount(initialCapacity * 2);
+ /* setValueCount() should have done another realloc */
+ assertEquals(initialCapacity * 2, vector2.getValueCount());
+ assertTrue(vector2.getValueCapacity() >= initialCapacity * 2);
+
+ /* check vector data after copy and realloc */
+ for (int i = 0; i < initialCapacity * 2; i++) {
+ if (((i & 1) == 0) || (i >= initialCapacity)) {
+ assertNull(vector2.getObject(i));
+ } else {
+ final BigDecimal decimal = vector2.getObject(i);
+ assertEquals(decimals[i], decimal);
+ }
+ }
+ }
+ }
+
+ @Test /* TimeStampVector */
+ public void testCopyFromWithNulls14() {
+ try (final TimeStampVector vector1 = new TimeStampMicroVector(EMPTY_SCHEMA_PATH, allocator);
+ final TimeStampVector vector2 = new TimeStampMicroVector(EMPTY_SCHEMA_PATH, allocator)) {
+
+ vector1.allocateNew();
+ assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION);
+ assertEquals(0, vector1.getValueCount());
+ int initialCapacity = vector1.getValueCapacity();
+
+ final long val = 20145678912L;
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ continue;
+ }
+ vector1.setSafe(i, val + (long) i);
+ }
+
+ vector1.setValueCount(initialCapacity);
+
+ /* No realloc should have happened in setSafe or
+ * setValueCount
+ */
+ assertEquals(initialCapacity, vector1.getValueCapacity());
+ assertEquals(initialCapacity, vector1.getValueCount());
+
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ assertNull(vector1.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, val + (long) i, vector1.get(i));
+ }
+ }
+
+ /* set lesser initial capacity than actually needed
+ * to trigger reallocs in copyFromSafe()
+ */
+ vector2.allocateNew(initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() >= initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() < initialCapacity / 2);
+
+ for (int i = 0; i < initialCapacity; i++) {
+ vector2.copyFromSafe(i, i, vector1);
+ }
+
+ /* 2 realloc should have happened in copyFromSafe() */
+ assertTrue(vector2.getValueCapacity() >= initialCapacity);
+ vector2.setValueCount(initialCapacity * 2);
+ /* setValueCount() should have done another realloc */
+ assertEquals(initialCapacity * 2, vector2.getValueCount());
+ assertTrue(vector2.getValueCapacity() >= initialCapacity * 2);
+
+ /* check vector data after copy and realloc */
+ for (int i = 0; i < initialCapacity * 2; i++) {
+ if (((i & 1) == 0) || (i >= initialCapacity)) {
+ assertNull(vector2.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, val + (long) i, vector2.get(i));
+ }
+ }
+ }
+ }
+
+ @Test //https://issues.apache.org/jira/browse/ARROW-7837
+ public void testCopySafeArrow7837() {
+ // this test exposes a bug in `handleSafe` where
+ // it reads a stale index and as a result missed a required resize of the value vector.
+ try (VarCharVector vc1 = new VarCharVector("vc1", allocator);
+ VarCharVector vc2 = new VarCharVector("vc2", allocator);
+ ) {
+ //initial size is carefully set in order to force the second 'copyFromSafe' operation
+ // to trigger a reallocation of the vector.
+ vc2.setInitialCapacity(/*valueCount*/20, /*density*/0.5);
+
+ vc1.setSafe(0, "1234567890".getBytes(Charset.forName("utf-8")));
+ assertFalse(vc1.isNull(0));
+ assertEquals(vc1.getObject(0).toString(), "1234567890");
+
+ vc2.copyFromSafe(0, 0, vc1);
+ assertFalse(vc2.isNull(0));
+ assertEquals(vc2.getObject(0).toString(), "1234567890");
+
+ vc2.copyFromSafe(0, 5, vc1);
+ assertTrue(vc2.isNull(1));
+ assertTrue(vc2.isNull(2));
+ assertTrue(vc2.isNull(3));
+ assertTrue(vc2.isNull(4));
+ assertFalse(vc2.isNull(5));
+ assertEquals(vc2.getObject(5).toString(), "1234567890");
+ }
+ }
+
+
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestDecimal256Vector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestDecimal256Vector.java
new file mode 100644
index 000000000..82c912cef
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestDecimal256Vector.java
@@ -0,0 +1,357 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import java.math.BigDecimal;
+import java.math.BigInteger;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestDecimal256Vector {
+
+ private static long[] intValues;
+
+ static {
+ intValues = new long[60];
+ for (int i = 0; i < intValues.length / 2; i++) {
+ intValues[i] = 1 << i + 1;
+ intValues[2 * i] = -1 * (1 << i + 1);
+ }
+ }
+
+ private int scale = 3;
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testValuesWriteRead() {
+ try (Decimal256Vector decimalVector = TestUtils.newVector(Decimal256Vector.class, "decimal",
+ new ArrowType.Decimal(10, scale, 256), allocator);) {
+
+ try (Decimal256Vector oldConstructor = new Decimal256Vector("decimal", allocator, 10, scale);) {
+ assertEquals(decimalVector.getField().getType(), oldConstructor.getField().getType());
+ }
+
+ decimalVector.allocateNew();
+ BigDecimal[] values = new BigDecimal[intValues.length];
+ for (int i = 0; i < intValues.length; i++) {
+ BigDecimal decimal = new BigDecimal(BigInteger.valueOf(intValues[i]), scale);
+ values[i] = decimal;
+ decimalVector.setSafe(i, decimal);
+ }
+
+ decimalVector.setValueCount(intValues.length);
+
+ for (int i = 0; i < intValues.length; i++) {
+ BigDecimal value = decimalVector.getObject(i);
+ assertEquals("unexpected data at index: " + i, values[i], value);
+ }
+ }
+ }
+
+ @Test
+ public void testDecimal256DifferentScaleAndPrecision() {
+ try (Decimal256Vector decimalVector = TestUtils.newVector(Decimal256Vector.class, "decimal",
+ new ArrowType.Decimal(4, 2, 256), allocator)) {
+ decimalVector.allocateNew();
+
+ // test Decimal256 with different scale
+ {
+ BigDecimal decimal = new BigDecimal(BigInteger.valueOf(0), 3);
+ UnsupportedOperationException ue =
+ assertThrows(UnsupportedOperationException.class, () -> decimalVector.setSafe(0, decimal));
+ assertEquals("BigDecimal scale must equal that in the Arrow vector: 3 != 2", ue.getMessage());
+ }
+
+ // test BigDecimal with larger precision than initialized
+ {
+ BigDecimal decimal = new BigDecimal(BigInteger.valueOf(12345), 2);
+ UnsupportedOperationException ue =
+ assertThrows(UnsupportedOperationException.class, () -> decimalVector.setSafe(0, decimal));
+ assertEquals("BigDecimal precision can not be greater than that in the Arrow vector: 5 > 4", ue.getMessage());
+ }
+ }
+ }
+
+ @Test
+ public void testWriteBigEndian() {
+ try (Decimal256Vector decimalVector = TestUtils.newVector(Decimal256Vector.class, "decimal",
+ new ArrowType.Decimal(38, 18, 256), allocator);) {
+ decimalVector.allocateNew();
+ BigDecimal decimal1 = new BigDecimal("123456789.000000000000000000");
+ BigDecimal decimal2 = new BigDecimal("11.123456789123456789");
+ BigDecimal decimal3 = new BigDecimal("1.000000000000000000");
+ BigDecimal decimal4 = new BigDecimal("0.111111111000000000");
+ BigDecimal decimal5 = new BigDecimal("987654321.123456789000000000");
+ BigDecimal decimal6 = new BigDecimal("222222222222.222222222000000000");
+ BigDecimal decimal7 = new BigDecimal("7777777777777.666666667000000000");
+ BigDecimal decimal8 = new BigDecimal("1212121212.343434343000000000");
+
+ byte[] decimalValue1 = decimal1.unscaledValue().toByteArray();
+ byte[] decimalValue2 = decimal2.unscaledValue().toByteArray();
+ byte[] decimalValue3 = decimal3.unscaledValue().toByteArray();
+ byte[] decimalValue4 = decimal4.unscaledValue().toByteArray();
+ byte[] decimalValue5 = decimal5.unscaledValue().toByteArray();
+ byte[] decimalValue6 = decimal6.unscaledValue().toByteArray();
+ byte[] decimalValue7 = decimal7.unscaledValue().toByteArray();
+ byte[] decimalValue8 = decimal8.unscaledValue().toByteArray();
+
+ decimalVector.setBigEndian(0, decimalValue1);
+ decimalVector.setBigEndian(1, decimalValue2);
+ decimalVector.setBigEndian(2, decimalValue3);
+ decimalVector.setBigEndian(3, decimalValue4);
+ decimalVector.setBigEndian(4, decimalValue5);
+ decimalVector.setBigEndian(5, decimalValue6);
+ decimalVector.setBigEndian(6, decimalValue7);
+ decimalVector.setBigEndian(7, decimalValue8);
+
+ decimalVector.setValueCount(8);
+ assertEquals(8, decimalVector.getValueCount());
+ assertEquals(decimal1, decimalVector.getObject(0));
+ assertEquals(decimal2, decimalVector.getObject(1));
+ assertEquals(decimal3, decimalVector.getObject(2));
+ assertEquals(decimal4, decimalVector.getObject(3));
+ assertEquals(decimal5, decimalVector.getObject(4));
+ assertEquals(decimal6, decimalVector.getObject(5));
+ assertEquals(decimal7, decimalVector.getObject(6));
+ assertEquals(decimal8, decimalVector.getObject(7));
+ }
+ }
+
+ @Test
+ public void testLongReadWrite() {
+ try (Decimal256Vector decimalVector = TestUtils.newVector(Decimal256Vector.class, "decimal",
+ new ArrowType.Decimal(38, 0, 256), allocator)) {
+ decimalVector.allocateNew();
+
+ long[] longValues = {0L, -2L, Long.MAX_VALUE, Long.MIN_VALUE, 187L};
+
+ for (int i = 0; i < longValues.length; ++i) {
+ decimalVector.set(i, longValues[i]);
+ }
+
+ decimalVector.setValueCount(longValues.length);
+
+ for (int i = 0; i < longValues.length; ++i) {
+ assertEquals(new BigDecimal(longValues[i]), decimalVector.getObject(i));
+ }
+ }
+ }
+
+
+ @Test
+ public void testBigDecimalReadWrite() {
+ try (Decimal256Vector decimalVector = TestUtils.newVector(Decimal256Vector.class, "decimal",
+ new ArrowType.Decimal(38, 9, 256), allocator);) {
+ decimalVector.allocateNew();
+ BigDecimal decimal1 = new BigDecimal("123456789.000000000");
+ BigDecimal decimal2 = new BigDecimal("11.123456789");
+ BigDecimal decimal3 = new BigDecimal("1.000000000");
+ BigDecimal decimal4 = new BigDecimal("-0.111111111");
+ BigDecimal decimal5 = new BigDecimal("-987654321.123456789");
+ BigDecimal decimal6 = new BigDecimal("-222222222222.222222222");
+ BigDecimal decimal7 = new BigDecimal("7777777777777.666666667");
+ BigDecimal decimal8 = new BigDecimal("1212121212.343434343");
+
+ decimalVector.set(0, decimal1);
+ decimalVector.set(1, decimal2);
+ decimalVector.set(2, decimal3);
+ decimalVector.set(3, decimal4);
+ decimalVector.set(4, decimal5);
+ decimalVector.set(5, decimal6);
+ decimalVector.set(6, decimal7);
+ decimalVector.set(7, decimal8);
+
+ decimalVector.setValueCount(8);
+ assertEquals(8, decimalVector.getValueCount());
+ assertEquals(decimal1, decimalVector.getObject(0));
+ assertEquals(decimal2, decimalVector.getObject(1));
+ assertEquals(decimal3, decimalVector.getObject(2));
+ assertEquals(decimal4, decimalVector.getObject(3));
+ assertEquals(decimal5, decimalVector.getObject(4));
+ assertEquals(decimal6, decimalVector.getObject(5));
+ assertEquals(decimal7, decimalVector.getObject(6));
+ assertEquals(decimal8, decimalVector.getObject(7));
+ }
+ }
+
+ /**
+ * Test {@link Decimal256Vector#setBigEndian(int, byte[])} which takes BE layout input and stores in native-endian
+ * (NE) layout.
+ * Cases to cover: input byte array in different lengths in range [1-16] and negative values.
+ */
+ @Test
+ public void decimalBE2NE() {
+ try (Decimal256Vector decimalVector = TestUtils.newVector(Decimal256Vector.class, "decimal",
+ new ArrowType.Decimal(23, 2, 256), allocator)) {
+ decimalVector.allocateNew();
+
+ BigInteger[] testBigInts = new BigInteger[] {
+ new BigInteger("0"),
+ new BigInteger("-1"),
+ new BigInteger("23"),
+ new BigInteger("234234"),
+ new BigInteger("-234234234"),
+ new BigInteger("234234234234"),
+ new BigInteger("-56345345345345"),
+ new BigInteger("2982346298346289346293467923465345634500"), // converts to 16+ byte array
+ new BigInteger("-389457298347598237459832459823434653600"), // converts to 16+ byte array
+ new BigInteger("-345345"),
+ new BigInteger("754533")
+ };
+
+ int insertionIdx = 0;
+ insertionIdx++; // insert a null
+ for (BigInteger val : testBigInts) {
+ decimalVector.setBigEndian(insertionIdx++, val.toByteArray());
+ }
+ insertionIdx++; // insert a null
+ // insert a zero length buffer
+ decimalVector.setBigEndian(insertionIdx++, new byte[0]);
+
+ // Try inserting a buffer larger than 33 bytes and expect a failure
+ final int insertionIdxCapture = insertionIdx;
+ IllegalArgumentException ex = assertThrows(IllegalArgumentException.class,
+ () -> decimalVector.setBigEndian(insertionIdxCapture, new byte[33]));
+ assertTrue(ex.getMessage().equals("Invalid decimal value length. Valid length in [1 - 32], got 33"));
+ decimalVector.setValueCount(insertionIdx);
+
+ // retrieve values and check if they are correct
+ int outputIdx = 0;
+ assertTrue(decimalVector.isNull(outputIdx++));
+ for (BigInteger expected : testBigInts) {
+ final BigDecimal actual = decimalVector.getObject(outputIdx++);
+ assertEquals(expected, actual.unscaledValue());
+ }
+ assertTrue(decimalVector.isNull(outputIdx++));
+ assertEquals(BigInteger.valueOf(0), decimalVector.getObject(outputIdx).unscaledValue());
+ }
+ }
+
+ @Test
+ public void setUsingArrowBufOfLEInts() {
+ try (Decimal256Vector decimalVector = TestUtils.newVector(Decimal256Vector.class, "decimal",
+ new ArrowType.Decimal(5, 2, 256), allocator);
+ ArrowBuf buf = allocator.buffer(8);) {
+ decimalVector.allocateNew();
+
+ // add a positive value equivalent to 705.32
+ int val = 70532;
+ buf.setInt(0, val);
+ decimalVector.setSafe(0, 0, buf, 4);
+
+ // add a -ve value equivalent to -705.32
+ val = -70532;
+ buf.setInt(4, val);
+ decimalVector.setSafe(1, 4, buf, 4);
+
+ decimalVector.setValueCount(2);
+
+ BigDecimal [] expectedValues = new BigDecimal[] {BigDecimal.valueOf(705.32), BigDecimal
+ .valueOf(-705.32)};
+ for (int i = 0; i < 2; i ++) {
+ BigDecimal value = decimalVector.getObject(i);
+ assertEquals(expectedValues[i], value);
+ }
+ }
+
+ }
+
+ @Test
+ public void setUsingArrowLongLEBytes() {
+ try (Decimal256Vector decimalVector = TestUtils.newVector(Decimal256Vector.class, "decimal",
+ new ArrowType.Decimal(18, 0, 256), allocator);
+ ArrowBuf buf = allocator.buffer(16);) {
+ decimalVector.allocateNew();
+
+ long val = Long.MAX_VALUE;
+ buf.setLong(0, val);
+ decimalVector.setSafe(0, 0, buf, 8);
+
+ val = Long.MIN_VALUE;
+ buf.setLong(8, val);
+ decimalVector.setSafe(1, 8, buf, 8);
+
+ decimalVector.setValueCount(2);
+
+ BigDecimal [] expectedValues = new BigDecimal[] {BigDecimal.valueOf(Long.MAX_VALUE), BigDecimal
+ .valueOf(Long.MIN_VALUE)};
+ for (int i = 0; i < 2; i ++) {
+ BigDecimal value = decimalVector.getObject(i);
+ assertEquals(expectedValues[i], value);
+ }
+ }
+ }
+
+ @Test
+ public void setUsingArrowBufOfBEBytes() {
+ try (Decimal256Vector decimalVector = TestUtils.newVector(Decimal256Vector.class, "decimal",
+ new ArrowType.Decimal(5, 2, 256), allocator);
+ ArrowBuf buf = allocator.buffer(9);) {
+ BigDecimal [] expectedValues = new BigDecimal[] {BigDecimal.valueOf(705.32), BigDecimal
+ .valueOf(-705.32), BigDecimal.valueOf(705.32)};
+ verifyWritingArrowBufWithBigEndianBytes(decimalVector, buf, expectedValues, 3);
+ }
+
+ try (Decimal256Vector decimalVector = TestUtils.newVector(Decimal256Vector.class, "decimal",
+ new ArrowType.Decimal(43, 2, 256), allocator);
+ ArrowBuf buf = allocator.buffer(45);) {
+ BigDecimal[] expectedValues = new BigDecimal[] {new BigDecimal("29823462983462893462934679234653450000000.63"),
+ new BigDecimal("-2982346298346289346293467923465345.63"),
+ new BigDecimal("2982346298346289346293467923465345.63")};
+ verifyWritingArrowBufWithBigEndianBytes(decimalVector, buf, expectedValues, 15);
+ }
+ }
+
+ private void verifyWritingArrowBufWithBigEndianBytes(Decimal256Vector decimalVector,
+ ArrowBuf buf, BigDecimal[] expectedValues,
+ int length) {
+ decimalVector.allocateNew();
+ for (int i = 0; i < expectedValues.length; i++) {
+ byte[] bigEndianBytes = expectedValues[i].unscaledValue().toByteArray();
+ buf.setBytes(length * i , bigEndianBytes, 0 , bigEndianBytes.length);
+ decimalVector.setBigEndianSafe(i, length * i, buf, bigEndianBytes.length);
+ }
+
+ decimalVector.setValueCount(3);
+
+ for (int i = 0; i < expectedValues.length; i ++) {
+ BigDecimal value = decimalVector.getObject(i);
+ assertEquals(expectedValues[i], value);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestDecimalVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestDecimalVector.java
new file mode 100644
index 000000000..c7e3e436e
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestDecimalVector.java
@@ -0,0 +1,365 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.math.BigDecimal;
+import java.math.BigInteger;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestDecimalVector {
+
+ private static long[] intValues;
+
+ static {
+ intValues = new long[60];
+ for (int i = 0; i < intValues.length / 2; i++) {
+ intValues[i] = 1 << i + 1;
+ intValues[2 * i] = -1 * (1 << i + 1);
+ }
+ }
+
+ private int scale = 3;
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testValuesWriteRead() {
+ try (DecimalVector decimalVector = TestUtils.newVector(DecimalVector.class, "decimal",
+ new ArrowType.Decimal(10, scale, 128), allocator);) {
+
+ try (DecimalVector oldConstructor = new DecimalVector("decimal", allocator, 10, scale);) {
+ assertEquals(decimalVector.getField().getType(), oldConstructor.getField().getType());
+ }
+
+ decimalVector.allocateNew();
+ BigDecimal[] values = new BigDecimal[intValues.length];
+ for (int i = 0; i < intValues.length; i++) {
+ BigDecimal decimal = new BigDecimal(BigInteger.valueOf(intValues[i]), scale);
+ values[i] = decimal;
+ decimalVector.setSafe(i, decimal);
+ }
+
+ decimalVector.setValueCount(intValues.length);
+
+ for (int i = 0; i < intValues.length; i++) {
+ BigDecimal value = decimalVector.getObject(i);
+ assertEquals("unexpected data at index: " + i, values[i], value);
+ }
+ }
+ }
+
+ @Test
+ public void testBigDecimalDifferentScaleAndPrecision() {
+ try (DecimalVector decimalVector = TestUtils.newVector(DecimalVector.class, "decimal",
+ new ArrowType.Decimal(4, 2, 128), allocator);) {
+ decimalVector.allocateNew();
+
+ // test BigDecimal with different scale
+ boolean hasError = false;
+ try {
+ BigDecimal decimal = new BigDecimal(BigInteger.valueOf(0), 3);
+ decimalVector.setSafe(0, decimal);
+ } catch (UnsupportedOperationException ue) {
+ hasError = true;
+ } finally {
+ assertTrue(hasError);
+ }
+
+ // test BigDecimal with larger precision than initialized
+ hasError = false;
+ try {
+ BigDecimal decimal = new BigDecimal(BigInteger.valueOf(12345), 2);
+ decimalVector.setSafe(0, decimal);
+ } catch (UnsupportedOperationException ue) {
+ hasError = true;
+ } finally {
+ assertTrue(hasError);
+ }
+ }
+ }
+
+ @Test
+ public void testWriteBigEndian() {
+ try (DecimalVector decimalVector = TestUtils.newVector(DecimalVector.class, "decimal",
+ new ArrowType.Decimal(38, 9, 128), allocator);) {
+ decimalVector.allocateNew();
+ BigDecimal decimal1 = new BigDecimal("123456789.000000000");
+ BigDecimal decimal2 = new BigDecimal("11.123456789");
+ BigDecimal decimal3 = new BigDecimal("1.000000000");
+ BigDecimal decimal4 = new BigDecimal("0.111111111");
+ BigDecimal decimal5 = new BigDecimal("987654321.123456789");
+ BigDecimal decimal6 = new BigDecimal("222222222222.222222222");
+ BigDecimal decimal7 = new BigDecimal("7777777777777.666666667");
+ BigDecimal decimal8 = new BigDecimal("1212121212.343434343");
+
+ byte[] decimalValue1 = decimal1.unscaledValue().toByteArray();
+ byte[] decimalValue2 = decimal2.unscaledValue().toByteArray();
+ byte[] decimalValue3 = decimal3.unscaledValue().toByteArray();
+ byte[] decimalValue4 = decimal4.unscaledValue().toByteArray();
+ byte[] decimalValue5 = decimal5.unscaledValue().toByteArray();
+ byte[] decimalValue6 = decimal6.unscaledValue().toByteArray();
+ byte[] decimalValue7 = decimal7.unscaledValue().toByteArray();
+ byte[] decimalValue8 = decimal8.unscaledValue().toByteArray();
+
+ decimalVector.setBigEndian(0, decimalValue1);
+ decimalVector.setBigEndian(1, decimalValue2);
+ decimalVector.setBigEndian(2, decimalValue3);
+ decimalVector.setBigEndian(3, decimalValue4);
+ decimalVector.setBigEndian(4, decimalValue5);
+ decimalVector.setBigEndian(5, decimalValue6);
+ decimalVector.setBigEndian(6, decimalValue7);
+ decimalVector.setBigEndian(7, decimalValue8);
+
+ decimalVector.setValueCount(8);
+ assertEquals(8, decimalVector.getValueCount());
+ assertEquals(decimal1, decimalVector.getObject(0));
+ assertEquals(decimal2, decimalVector.getObject(1));
+ assertEquals(decimal3, decimalVector.getObject(2));
+ assertEquals(decimal4, decimalVector.getObject(3));
+ assertEquals(decimal5, decimalVector.getObject(4));
+ assertEquals(decimal6, decimalVector.getObject(5));
+ assertEquals(decimal7, decimalVector.getObject(6));
+ assertEquals(decimal8, decimalVector.getObject(7));
+ }
+ }
+
+ @Test
+ public void testLongReadWrite() {
+ try (DecimalVector decimalVector = TestUtils.newVector(DecimalVector.class, "decimal",
+ new ArrowType.Decimal(38, 0, 128), allocator)) {
+ decimalVector.allocateNew();
+
+ long[] longValues = {0L, -2L, Long.MAX_VALUE, Long.MIN_VALUE, 187L};
+
+ for (int i = 0; i < longValues.length; ++i) {
+ decimalVector.set(i, longValues[i]);
+ }
+
+ decimalVector.setValueCount(longValues.length);
+
+ for (int i = 0; i < longValues.length; ++i) {
+ assertEquals(new BigDecimal(longValues[i]), decimalVector.getObject(i));
+ }
+ }
+ }
+
+
+ @Test
+ public void testBigDecimalReadWrite() {
+ try (DecimalVector decimalVector = TestUtils.newVector(DecimalVector.class, "decimal",
+ new ArrowType.Decimal(38, 9, 128), allocator);) {
+ decimalVector.allocateNew();
+ BigDecimal decimal1 = new BigDecimal("123456789.000000000");
+ BigDecimal decimal2 = new BigDecimal("11.123456789");
+ BigDecimal decimal3 = new BigDecimal("1.000000000");
+ BigDecimal decimal4 = new BigDecimal("-0.111111111");
+ BigDecimal decimal5 = new BigDecimal("-987654321.123456789");
+ BigDecimal decimal6 = new BigDecimal("-222222222222.222222222");
+ BigDecimal decimal7 = new BigDecimal("7777777777777.666666667");
+ BigDecimal decimal8 = new BigDecimal("1212121212.343434343");
+
+ decimalVector.set(0, decimal1);
+ decimalVector.set(1, decimal2);
+ decimalVector.set(2, decimal3);
+ decimalVector.set(3, decimal4);
+ decimalVector.set(4, decimal5);
+ decimalVector.set(5, decimal6);
+ decimalVector.set(6, decimal7);
+ decimalVector.set(7, decimal8);
+
+ decimalVector.setValueCount(8);
+ assertEquals(8, decimalVector.getValueCount());
+ assertEquals(decimal1, decimalVector.getObject(0));
+ assertEquals(decimal2, decimalVector.getObject(1));
+ assertEquals(decimal3, decimalVector.getObject(2));
+ assertEquals(decimal4, decimalVector.getObject(3));
+ assertEquals(decimal5, decimalVector.getObject(4));
+ assertEquals(decimal6, decimalVector.getObject(5));
+ assertEquals(decimal7, decimalVector.getObject(6));
+ assertEquals(decimal8, decimalVector.getObject(7));
+ }
+ }
+
+ /**
+ * Test {@link DecimalVector#setBigEndian(int, byte[])} which takes BE layout input and stores in native-endian (NE)
+ * layout.
+ * Cases to cover: input byte array in different lengths in range [1-16] and negative values.
+ */
+ @Test
+ public void decimalBE2NE() {
+ try (DecimalVector decimalVector = TestUtils.newVector(DecimalVector.class, "decimal",
+ new ArrowType.Decimal(21, 2, 128), allocator)) {
+ decimalVector.allocateNew();
+
+ BigInteger[] testBigInts = new BigInteger[] {
+ new BigInteger("0"),
+ new BigInteger("-1"),
+ new BigInteger("23"),
+ new BigInteger("234234"),
+ new BigInteger("-234234234"),
+ new BigInteger("234234234234"),
+ new BigInteger("-56345345345345"),
+ new BigInteger("29823462983462893462934679234653456345"), // converts to 16 byte array
+ new BigInteger("-3894572983475982374598324598234346536"), // converts to 16 byte array
+ new BigInteger("-345345"),
+ new BigInteger("754533")
+ };
+
+ int insertionIdx = 0;
+ insertionIdx++; // insert a null
+ for (BigInteger val : testBigInts) {
+ decimalVector.setBigEndian(insertionIdx++, val.toByteArray());
+ }
+ insertionIdx++; // insert a null
+ // insert a zero length buffer
+ decimalVector.setBigEndian(insertionIdx++, new byte[0]);
+
+ // Try inserting a buffer larger than 16bytes and expect a failure
+ try {
+ decimalVector.setBigEndian(insertionIdx, new byte[17]);
+ fail("above statement should have failed");
+ } catch (IllegalArgumentException ex) {
+ assertTrue(ex.getMessage().equals("Invalid decimal value length. Valid length in [1 - 16], got 17"));
+ }
+ decimalVector.setValueCount(insertionIdx);
+
+ // retrieve values and check if they are correct
+ int outputIdx = 0;
+ assertTrue(decimalVector.isNull(outputIdx++));
+ for (BigInteger expected : testBigInts) {
+ final BigDecimal actual = decimalVector.getObject(outputIdx++);
+ assertEquals(expected, actual.unscaledValue());
+ }
+ assertTrue(decimalVector.isNull(outputIdx++));
+ assertEquals(BigInteger.valueOf(0), decimalVector.getObject(outputIdx).unscaledValue());
+ }
+ }
+
+ @Test
+ public void setUsingArrowBufOfInts() {
+ try (DecimalVector decimalVector = TestUtils.newVector(DecimalVector.class, "decimal",
+ new ArrowType.Decimal(5, 2, 128), allocator);
+ ArrowBuf buf = allocator.buffer(8);) {
+ decimalVector.allocateNew();
+
+ // add a positive value equivalent to 705.32
+ int val = 70532;
+ buf.setInt(0, val);
+ decimalVector.setSafe(0, 0, buf, 4);
+
+ // add a -ve value equivalent to -705.32
+ val = -70532;
+ buf.setInt(4, val);
+ decimalVector.setSafe(1, 4, buf, 4);
+
+ decimalVector.setValueCount(2);
+
+ BigDecimal [] expectedValues = new BigDecimal[] {BigDecimal.valueOf(705.32), BigDecimal
+ .valueOf(-705.32)};
+ for (int i = 0; i < 2; i ++) {
+ BigDecimal value = decimalVector.getObject(i);
+ assertEquals(expectedValues[i], value);
+ }
+ }
+
+ }
+
+ @Test
+ public void setUsingArrowLongBytes() {
+ try (DecimalVector decimalVector = TestUtils.newVector(DecimalVector.class, "decimal",
+ new ArrowType.Decimal(18, 0, 128), allocator);
+ ArrowBuf buf = allocator.buffer(16);) {
+ decimalVector.allocateNew();
+
+ long val = Long.MAX_VALUE;
+ buf.setLong(0, val);
+ decimalVector.setSafe(0, 0, buf, 8);
+
+ val = Long.MIN_VALUE;
+ buf.setLong(8, val);
+ decimalVector.setSafe(1, 8, buf, 8);
+
+ decimalVector.setValueCount(2);
+
+ BigDecimal [] expectedValues = new BigDecimal[] {BigDecimal.valueOf(Long.MAX_VALUE), BigDecimal
+ .valueOf(Long.MIN_VALUE)};
+ for (int i = 0; i < 2; i ++) {
+ BigDecimal value = decimalVector.getObject(i);
+ assertEquals(expectedValues[i], value);
+ }
+ }
+ }
+
+ @Test
+ public void setUsingArrowBufOfBEBytes() {
+ try (DecimalVector decimalVector = TestUtils.newVector(DecimalVector.class, "decimal",
+ new ArrowType.Decimal(5, 2, 128), allocator);
+ ArrowBuf buf = allocator.buffer(9);) {
+ BigDecimal [] expectedValues = new BigDecimal[] {BigDecimal.valueOf(705.32), BigDecimal
+ .valueOf(-705.32), BigDecimal.valueOf(705.32)};
+ verifyWritingArrowBufWithBigEndianBytes(decimalVector, buf, expectedValues, 3);
+ }
+
+ try (DecimalVector decimalVector = TestUtils.newVector(DecimalVector.class, "decimal",
+ new ArrowType.Decimal(36, 2, 128), allocator);
+ ArrowBuf buf = allocator.buffer(45);) {
+ BigDecimal[] expectedValues = new BigDecimal[] {new BigDecimal("2982346298346289346293467923465345.63"),
+ new BigDecimal("-2982346298346289346293467923465345.63"),
+ new BigDecimal("2982346298346289346293467923465345.63")};
+ verifyWritingArrowBufWithBigEndianBytes(decimalVector, buf, expectedValues, 15);
+ }
+ }
+
+ private void verifyWritingArrowBufWithBigEndianBytes(DecimalVector decimalVector,
+ ArrowBuf buf, BigDecimal[] expectedValues,
+ int length) {
+ decimalVector.allocateNew();
+ for (int i = 0; i < expectedValues.length; i++) {
+ byte []bigEndianBytes = expectedValues[i].unscaledValue().toByteArray();
+ buf.setBytes(length * i , bigEndianBytes, 0 , bigEndianBytes.length);
+ decimalVector.setBigEndianSafe(i, length * i, buf, bigEndianBytes.length);
+ }
+
+ decimalVector.setValueCount(3);
+
+ for (int i = 0; i < expectedValues.length; i ++) {
+ BigDecimal value = decimalVector.getObject(i);
+ assertEquals(expectedValues[i], value);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java
new file mode 100644
index 000000000..01becf007
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java
@@ -0,0 +1,639 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.DenseUnionVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.VectorWithOrdinal;
+import org.apache.arrow.vector.holders.NullableBigIntHolder;
+import org.apache.arrow.vector.holders.NullableBitHolder;
+import org.apache.arrow.vector.holders.NullableFloat4Holder;
+import org.apache.arrow.vector.holders.NullableIntHolder;
+import org.apache.arrow.vector.holders.NullableUInt4Holder;
+import org.apache.arrow.vector.testing.ValueVectorDataPopulator;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.UnionMode;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.JsonStringHashMap;
+import org.apache.arrow.vector.util.Text;
+import org.apache.arrow.vector.util.TransferPair;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestDenseUnionVector {
+ private static final String EMPTY_SCHEMA_PATH = "";
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testDenseUnionVector() throws Exception {
+
+ final NullableUInt4Holder uInt4Holder = new NullableUInt4Holder();
+ uInt4Holder.value = 100;
+ uInt4Holder.isSet = 1;
+
+ try (DenseUnionVector unionVector = new DenseUnionVector(EMPTY_SCHEMA_PATH, allocator, null, null)) {
+ unionVector.allocateNew();
+
+ // write some data
+ byte uint4TypeId = unionVector.registerNewTypeId(Field.nullable("", MinorType.UINT4.getType()));
+ unionVector.setTypeId(0, uint4TypeId);
+ unionVector.setSafe(0, uInt4Holder);
+ unionVector.setTypeId(2, uint4TypeId);
+ unionVector.setSafe(2, uInt4Holder);
+ unionVector.setValueCount(4);
+
+ // check that what we wrote is correct
+ assertEquals(4, unionVector.getValueCount());
+
+ assertEquals(false, unionVector.isNull(0));
+ assertEquals(100, unionVector.getObject(0));
+
+ assertNull(unionVector.getObject(1));
+
+ assertEquals(false, unionVector.isNull(2));
+ assertEquals(100, unionVector.getObject(2));
+
+ assertNull(unionVector.getObject(3));
+ }
+ }
+
+ @Test
+ public void testTransfer() throws Exception {
+ try (DenseUnionVector srcVector = new DenseUnionVector(EMPTY_SCHEMA_PATH, allocator, null, null)) {
+ srcVector.allocateNew();
+
+ // write some data
+ byte intTypeId = srcVector.registerNewTypeId(Field.nullable("", MinorType.INT.getType()));
+ srcVector.setTypeId(0, intTypeId);
+ srcVector.setSafe(0, newIntHolder(5));
+ byte bitTypeId = srcVector.registerNewTypeId(Field.nullable("", MinorType.BIT.getType()));
+ srcVector.setTypeId(1, bitTypeId);
+ srcVector.setSafe(1, newBitHolder(false));
+ srcVector.setTypeId(3, intTypeId);
+ srcVector.setSafe(3, newIntHolder(10));
+ srcVector.setTypeId(5, bitTypeId);
+ srcVector.setSafe(5, newBitHolder(false));
+ srcVector.setValueCount(6);
+
+ try (DenseUnionVector destVector = new DenseUnionVector(EMPTY_SCHEMA_PATH, allocator, null, null)) {
+ TransferPair pair = srcVector.makeTransferPair(destVector);
+
+ // Creating the transfer should transfer the type of the field at least.
+ assertEquals(srcVector.getField(), destVector.getField());
+
+ // transfer
+ pair.transfer();
+
+ assertEquals(srcVector.getField(), destVector.getField());
+
+ // now check the values are transferred
+ assertEquals(6, destVector.getValueCount());
+
+ assertFalse(destVector.isNull(0));
+ assertEquals(5, destVector.getObject(0));
+
+ assertFalse(destVector.isNull(1));
+ assertEquals(false, destVector.getObject(1));
+
+ assertNull(destVector.getObject(2));
+
+ assertFalse(destVector.isNull(3));
+ assertEquals(10, destVector.getObject(3));
+
+ assertNull(destVector.getObject(4));
+
+ assertFalse(destVector.isNull(5));
+ assertEquals(false, destVector.getObject(5));
+ }
+ }
+ }
+
+ @Test
+ public void testSplitAndTransfer() throws Exception {
+ try (DenseUnionVector sourceVector = new DenseUnionVector(EMPTY_SCHEMA_PATH, allocator, null, null)) {
+
+ sourceVector.allocateNew();
+
+ /* populate the UnionVector */
+ byte intTypeId = sourceVector.registerNewTypeId(Field.nullable("", MinorType.INT.getType()));
+ sourceVector.setTypeId(0, intTypeId);
+ sourceVector.setSafe(0, newIntHolder(5));
+ sourceVector.setTypeId(1, intTypeId);
+ sourceVector.setSafe(1, newIntHolder(10));
+ sourceVector.setTypeId(2, intTypeId);
+ sourceVector.setSafe(2, newIntHolder(15));
+ sourceVector.setTypeId(3, intTypeId);
+ sourceVector.setSafe(3, newIntHolder(20));
+ sourceVector.setTypeId(4, intTypeId);
+ sourceVector.setSafe(4, newIntHolder(25));
+ sourceVector.setTypeId(5, intTypeId);
+ sourceVector.setSafe(5, newIntHolder(30));
+ sourceVector.setTypeId(6, intTypeId);
+ sourceVector.setSafe(6, newIntHolder(35));
+ sourceVector.setTypeId(7, intTypeId);
+ sourceVector.setSafe(7, newIntHolder(40));
+ sourceVector.setTypeId(8, intTypeId);
+ sourceVector.setSafe(8, newIntHolder(45));
+ sourceVector.setTypeId(9, intTypeId);
+ sourceVector.setSafe(9, newIntHolder(50));
+ sourceVector.setValueCount(10);
+
+ /* check the vector output */
+ assertEquals(10, sourceVector.getValueCount());
+ assertEquals(false, sourceVector.isNull(0));
+ assertEquals(5, sourceVector.getObject(0));
+ assertEquals(false, sourceVector.isNull(1));
+ assertEquals(10, sourceVector.getObject(1));
+ assertEquals(false, sourceVector.isNull(2));
+ assertEquals(15, sourceVector.getObject(2));
+ assertEquals(false, sourceVector.isNull(3));
+ assertEquals(20, sourceVector.getObject(3));
+ assertEquals(false, sourceVector.isNull(4));
+ assertEquals(25, sourceVector.getObject(4));
+ assertEquals(false, sourceVector.isNull(5));
+ assertEquals(30, sourceVector.getObject(5));
+ assertEquals(false, sourceVector.isNull(6));
+ assertEquals(35, sourceVector.getObject(6));
+ assertEquals(false, sourceVector.isNull(7));
+ assertEquals(40, sourceVector.getObject(7));
+ assertEquals(false, sourceVector.isNull(8));
+ assertEquals(45, sourceVector.getObject(8));
+ assertEquals(false, sourceVector.isNull(9));
+ assertEquals(50, sourceVector.getObject(9));
+
+ try (DenseUnionVector toVector = new DenseUnionVector(EMPTY_SCHEMA_PATH, allocator, null, null)) {
+ toVector.registerNewTypeId(Field.nullable("", MinorType.INT.getType()));
+
+ final TransferPair transferPair = sourceVector.makeTransferPair(toVector);
+
+ final int[][] transferLengths = {{0, 3},
+ {3, 1},
+ {4, 2},
+ {6, 1},
+ {7, 1},
+ {8, 2}
+ };
+
+ for (final int[] transferLength : transferLengths) {
+ final int start = transferLength[0];
+ final int length = transferLength[1];
+
+ transferPair.splitAndTransfer(start, length);
+
+ /* check the toVector output after doing the splitAndTransfer */
+ for (int i = 0; i < length; i++) {
+ assertEquals("Different data at indexes: " + (start + i) + "and " + i, sourceVector.getObject(start + i),
+ toVector.getObject(i));
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testSplitAndTransferWithMixedVectors() throws Exception {
+ try (DenseUnionVector sourceVector = new DenseUnionVector(EMPTY_SCHEMA_PATH, allocator, null, null)) {
+
+ sourceVector.allocateNew();
+
+ /* populate the UnionVector */
+ byte intTypeId = sourceVector.registerNewTypeId(Field.nullable("", MinorType.INT.getType()));
+
+ sourceVector.setTypeId(0, intTypeId);
+ sourceVector.setSafe(0, newIntHolder(5));
+
+ byte float4TypeId = sourceVector.registerNewTypeId(Field.nullable("", MinorType.FLOAT4.getType()));
+
+ sourceVector.setTypeId(1, float4TypeId);
+ sourceVector.setSafe(1, newFloat4Holder(5.5f));
+
+ sourceVector.setTypeId(2, intTypeId);
+ sourceVector.setSafe(2, newIntHolder(10));
+
+ sourceVector.setTypeId(3, float4TypeId);
+ sourceVector.setSafe(3, newFloat4Holder(10.5f));
+
+ sourceVector.setTypeId(4, intTypeId);
+ sourceVector.setSafe(4, newIntHolder(15));
+
+ sourceVector.setTypeId(5, float4TypeId);
+ sourceVector.setSafe(5, newFloat4Holder(15.5f));
+
+ sourceVector.setTypeId(6, intTypeId);
+ sourceVector.setSafe(6, newIntHolder(20));
+
+ sourceVector.setTypeId(7, float4TypeId);
+ sourceVector.setSafe(7, newFloat4Holder(20.5f));
+
+ sourceVector.setTypeId(8, intTypeId);
+ sourceVector.setSafe(8, newIntHolder(30));
+
+ sourceVector.setTypeId(9, float4TypeId);
+ sourceVector.setSafe(9, newFloat4Holder(30.5f));
+ sourceVector.setValueCount(10);
+
+ /* check the vector output */
+ assertEquals(10, sourceVector.getValueCount());
+ assertEquals(false, sourceVector.isNull(0));
+ assertEquals(5, sourceVector.getObject(0));
+ assertEquals(false, sourceVector.isNull(1));
+ assertEquals(5.5f, sourceVector.getObject(1));
+ assertEquals(false, sourceVector.isNull(2));
+ assertEquals(10, sourceVector.getObject(2));
+ assertEquals(false, sourceVector.isNull(3));
+ assertEquals(10.5f, sourceVector.getObject(3));
+ assertEquals(false, sourceVector.isNull(4));
+ assertEquals(15, sourceVector.getObject(4));
+ assertEquals(false, sourceVector.isNull(5));
+ assertEquals(15.5f, sourceVector.getObject(5));
+ assertEquals(false, sourceVector.isNull(6));
+ assertEquals(20, sourceVector.getObject(6));
+ assertEquals(false, sourceVector.isNull(7));
+ assertEquals(20.5f, sourceVector.getObject(7));
+ assertEquals(false, sourceVector.isNull(8));
+ assertEquals(30, sourceVector.getObject(8));
+ assertEquals(false, sourceVector.isNull(9));
+ assertEquals(30.5f, sourceVector.getObject(9));
+
+ try (DenseUnionVector toVector = new DenseUnionVector(EMPTY_SCHEMA_PATH, allocator, null, null)) {
+ toVector.registerNewTypeId(Field.nullable("", MinorType.INT.getType()));
+ toVector.registerNewTypeId(Field.nullable("", MinorType.FLOAT4.getType()));
+
+ final TransferPair transferPair = sourceVector.makeTransferPair(toVector);
+
+ final int[][] transferLengths = {{0, 2},
+ {2, 1},
+ {3, 2},
+ {5, 3},
+ {8, 2}
+ };
+
+ for (final int[] transferLength : transferLengths) {
+ final int start = transferLength[0];
+ final int length = transferLength[1];
+
+ transferPair.splitAndTransfer(start, length);
+
+ /* check the toVector output after doing the splitAndTransfer */
+ for (int i = 0; i < length; i++) {
+ assertEquals("Different values at index: " + i, sourceVector.getObject(start + i), toVector.getObject(i));
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testGetFieldTypeInfo() throws Exception {
+ Map<String, String> metadata = new HashMap<>();
+ metadata.put("key1", "value1");
+
+ int[] typeIds = new int[2];
+ typeIds[0] = 0;
+ typeIds[1] = 1;
+
+ List<Field> children = new ArrayList<>();
+ children.add(new Field("int", FieldType.nullable(MinorType.INT.getType()), null));
+ children.add(new Field("varchar", FieldType.nullable(MinorType.VARCHAR.getType()), null));
+
+ final FieldType fieldType = new FieldType(false, new ArrowType.Union(UnionMode.Dense, typeIds),
+ /*dictionary=*/null, metadata);
+ final Field field = new Field("union", fieldType, children);
+
+ MinorType minorType = MinorType.DENSEUNION;
+ DenseUnionVector vector = (DenseUnionVector) minorType.getNewVector(field, allocator, null);
+ vector.initializeChildrenFromFields(children);
+
+ assertEquals(vector.getField(), field);
+
+ // Union has 2 child vectors
+ assertEquals(vector.size(), 2);
+
+ // Check child field 0
+ VectorWithOrdinal intChild = vector.getChildVectorWithOrdinal("int");
+ assertEquals(intChild.ordinal, 0);
+ assertEquals(intChild.vector.getField(), children.get(0));
+
+ // Check child field 1
+ VectorWithOrdinal varcharChild = vector.getChildVectorWithOrdinal("varchar");
+ assertEquals(varcharChild.ordinal, 1);
+ assertEquals(varcharChild.vector.getField(), children.get(1));
+ }
+
+ @Test
+ public void testGetBufferAddress() throws Exception {
+ try (DenseUnionVector vector = new DenseUnionVector(EMPTY_SCHEMA_PATH, allocator, null, null)) {
+ boolean error = false;
+
+ vector.allocateNew();
+
+ /* populate the UnionVector */
+ byte intTypeId = vector.registerNewTypeId(Field.nullable("", MinorType.INT.getType()));
+ vector.setTypeId(0, intTypeId);
+ vector.setSafe(0, newIntHolder(5));
+
+ byte float4TypeId = vector.registerNewTypeId(Field.nullable("", MinorType.INT.getType()));
+ vector.setTypeId(1, float4TypeId);
+ vector.setSafe(1, newFloat4Holder(5.5f));
+
+ vector.setTypeId(2, intTypeId);
+ vector.setSafe(2, newIntHolder(10));
+
+ vector.setTypeId(3, float4TypeId);
+ vector.setSafe(3, newFloat4Holder(10.5f));
+
+ vector.setValueCount(10);
+
+ /* check the vector output */
+ assertEquals(10, vector.getValueCount());
+ assertEquals(false, vector.isNull(0));
+ assertEquals(5, vector.getObject(0));
+ assertEquals(false, vector.isNull(1));
+ assertEquals(5.5f, vector.getObject(1));
+ assertEquals(false, vector.isNull(2));
+ assertEquals(10, vector.getObject(2));
+ assertEquals(false, vector.isNull(3));
+ assertEquals(10.5f, vector.getObject(3));
+
+ List<ArrowBuf> buffers = vector.getFieldBuffers();
+
+ long offsetAddress = vector.getOffsetBufferAddress();
+
+ try {
+ vector.getDataBufferAddress();
+ } catch (UnsupportedOperationException ue) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ }
+
+ assertEquals(2, buffers.size());
+ assertEquals(offsetAddress, buffers.get(1).memoryAddress());
+ }
+ }
+
+ /**
+ * Test adding two struct vectors to the dense union vector.
+ */
+ @Test
+ public void testMultipleStructs() {
+ FieldType type = new FieldType(true, ArrowType.Struct.INSTANCE, null, null);
+ try (StructVector structVector1 = new StructVector("struct1", allocator, type, null);
+ StructVector structVector2 = new StructVector("struct2", allocator, type, null);
+ DenseUnionVector unionVector = DenseUnionVector.empty("union", allocator)) {
+
+ // prepare sub vectors
+
+ // first struct vector: (int, int)
+ IntVector subVector11 = structVector1
+ .addOrGet("sub11", FieldType.nullable(MinorType.INT.getType()), IntVector.class);
+ subVector11.allocateNew();
+ ValueVectorDataPopulator.setVector(subVector11, 0, 1);
+
+ IntVector subVector12 = structVector1
+ .addOrGet("sub12", FieldType.nullable(MinorType.INT.getType()), IntVector.class);
+ subVector12.allocateNew();
+ ValueVectorDataPopulator.setVector(subVector12, 0, 10);
+
+ structVector1.setIndexDefined(0);
+ structVector1.setIndexDefined(1);
+ structVector1.setValueCount(2);
+
+ // second struct vector: (string, string)
+ VarCharVector subVector21 = structVector2
+ .addOrGet("sub21", FieldType.nullable(MinorType.VARCHAR.getType()), VarCharVector.class);
+ subVector21.allocateNew();
+ ValueVectorDataPopulator.setVector(subVector21, "a0");
+
+ VarCharVector subVector22 = structVector2
+ .addOrGet("sub22", FieldType.nullable(MinorType.VARCHAR.getType()), VarCharVector.class);
+ subVector22.allocateNew();
+ ValueVectorDataPopulator.setVector(subVector22, "b0");
+
+ structVector2.setIndexDefined(0);
+ structVector2.setValueCount(1);
+
+ // register relative types
+ byte typeId1 = unionVector.registerNewTypeId(structVector1.getField());
+ byte typeId2 = unionVector.registerNewTypeId(structVector2.getField());
+ assertEquals(typeId1, 0);
+ assertEquals(typeId2, 1);
+
+ // add two struct vectors to union vector
+ unionVector.addVector(typeId1, structVector1);
+ unionVector.addVector(typeId2, structVector2);
+
+ while (unionVector.getValueCapacity() < 3) {
+ unionVector.reAlloc();
+ }
+
+ ArrowBuf offsetBuf = unionVector.getOffsetBuffer();
+
+ unionVector.setTypeId(0, typeId1);
+ offsetBuf.setInt(0, 0);
+
+ unionVector.setTypeId(1, typeId2);
+ offsetBuf.setInt(DenseUnionVector.OFFSET_WIDTH, 0);
+
+ unionVector.setTypeId(2, typeId1);
+ offsetBuf.setInt(DenseUnionVector.OFFSET_WIDTH * 2, 1);
+
+ unionVector.setValueCount(3);
+
+ Map<String, Integer> value0 = new JsonStringHashMap<>();
+ value0.put("sub11", 0);
+ value0.put("sub12", 0);
+
+ assertEquals(value0, unionVector.getObject(0));
+
+ Map<String, Text> value1 = new JsonStringHashMap<>();
+ value1.put("sub21", new Text("a0"));
+ value1.put("sub22", new Text("b0"));
+
+ assertEquals(value1, unionVector.getObject(1));
+
+ Map<String, Integer> value2 = new JsonStringHashMap<>();
+ value2.put("sub11", 1);
+ value2.put("sub12", 10);
+
+ assertEquals(value2, unionVector.getObject(2));
+ }
+ }
+
+ /**
+ * Test adding two varchar vectors to the dense union vector.
+ */
+ @Test
+ public void testMultipleVarChars() {
+ try (VarCharVector childVector1 = new VarCharVector("child1", allocator);
+ VarCharVector childVector2 = new VarCharVector("child2", allocator);
+ DenseUnionVector unionVector = DenseUnionVector.empty("union", allocator)) {
+
+ // prepare sub vectors
+ ValueVectorDataPopulator.setVector(childVector1, "a0", "a4");
+ ValueVectorDataPopulator.setVector(childVector2, "b1", "b2");
+
+ // register relative types
+ byte typeId1 = unionVector.registerNewTypeId(childVector1.getField());
+ byte typeId2 = unionVector.registerNewTypeId(childVector2.getField());
+
+ assertEquals(typeId1, 0);
+ assertEquals(typeId2, 1);
+
+ while (unionVector.getValueCapacity() < 5) {
+ unionVector.reAlloc();
+ }
+
+ // add two struct vectors to union vector
+ unionVector.addVector(typeId1, childVector1);
+ unionVector.addVector(typeId2, childVector2);
+
+ ArrowBuf offsetBuf = unionVector.getOffsetBuffer();
+
+ // slot 0 points to child1
+ unionVector.setTypeId(0, typeId1);
+ offsetBuf.setInt(0, 0);
+
+ // slot 1 points to child2
+ unionVector.setTypeId(1, typeId2);
+ offsetBuf.setInt(DenseUnionVector.OFFSET_WIDTH, 0);
+
+ // slot 2 points to child2
+ unionVector.setTypeId(2, typeId2);
+ offsetBuf.setInt(DenseUnionVector.OFFSET_WIDTH * 2, 1);
+
+
+ // slot 4 points to child1
+ unionVector.setTypeId(4, typeId1);
+ offsetBuf.setInt(DenseUnionVector.OFFSET_WIDTH * 4, 1);
+
+ unionVector.setValueCount(5);
+
+ assertEquals(new Text("a0"), unionVector.getObject(0));
+ assertEquals(new Text("b1"), unionVector.getObject(1));
+ assertEquals(new Text("b2"), unionVector.getObject(2));
+ assertNull(unionVector.getObject(3));
+ assertEquals(new Text("a4"), unionVector.getObject(4));
+ }
+ }
+
+ @Test
+ public void testChildVectorValueCounts() {
+ final NullableIntHolder intHolder = new NullableIntHolder();
+ intHolder.isSet = 1;
+
+ final NullableBigIntHolder longHolder = new NullableBigIntHolder();
+ longHolder.isSet = 1;
+
+ final NullableFloat4Holder floatHolder = new NullableFloat4Holder();
+ floatHolder.isSet = 1;
+
+ try (DenseUnionVector vector = new DenseUnionVector("vector", allocator, null, null)) {
+ vector.allocateNew();
+
+ // populate the delta vector with values {7, null, 8L, 9.0f, 10, 12L}
+ while (vector.getValueCapacity() < 6) {
+ vector.reAlloc();
+ }
+ byte intTypeId = vector.registerNewTypeId(Field.nullable("", Types.MinorType.INT.getType()));
+ vector.setTypeId(0, intTypeId);
+ intHolder.value = 7;
+ vector.setSafe(0, intHolder);
+ byte longTypeId = vector.registerNewTypeId(Field.nullable("", Types.MinorType.BIGINT.getType()));
+ vector.setTypeId(2, longTypeId);
+ longHolder.value = 8L;
+ vector.setSafe(2, longHolder);
+ byte floatTypeId = vector.registerNewTypeId(Field.nullable("", Types.MinorType.FLOAT4.getType()));
+ vector.setTypeId(3, floatTypeId);
+ floatHolder.value = 9.0f;
+ vector.setSafe(3, floatHolder);
+
+ vector.setTypeId(4, intTypeId);
+ intHolder.value = 10;
+ vector.setSafe(4, intHolder);
+ vector.setTypeId(5, longTypeId);
+ longHolder.value = 12L;
+ vector.setSafe(5, longHolder);
+
+ vector.setValueCount(6);
+
+ // verify results
+ IntVector intVector = (IntVector) vector.getVectorByType(intTypeId);
+ assertEquals(2, intVector.getValueCount());
+ assertEquals(7, intVector.get(0));
+ assertEquals(10, intVector.get(1));
+
+ BigIntVector longVector = (BigIntVector) vector.getVectorByType(longTypeId);
+ assertEquals(2, longVector.getValueCount());
+ assertEquals(8L, longVector.get(0));
+ assertEquals(12L, longVector.get(1));
+
+ Float4Vector floagVector = (Float4Vector) vector.getVectorByType(floatTypeId);
+ assertEquals(1, floagVector.getValueCount());
+ assertEquals(9.0f, floagVector.get(0), 0);
+ }
+ }
+
+ private static NullableIntHolder newIntHolder(int value) {
+ final NullableIntHolder holder = new NullableIntHolder();
+ holder.isSet = 1;
+ holder.value = value;
+ return holder;
+ }
+
+ private static NullableBitHolder newBitHolder(boolean value) {
+ final NullableBitHolder holder = new NullableBitHolder();
+ holder.isSet = 1;
+ holder.value = value ? 1 : 0;
+ return holder;
+ }
+
+ private static NullableFloat4Holder newFloat4Holder(float value) {
+ final NullableFloat4Holder holder = new NullableFloat4Holder();
+ holder.isSet = 1;
+ holder.value = value;
+ return holder;
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java
new file mode 100644
index 000000000..bc6cddf36
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java
@@ -0,0 +1,1032 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.TestUtils.newVarBinaryVector;
+import static org.apache.arrow.vector.TestUtils.newVarCharVector;
+import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.function.ToIntBiFunction;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.complex.impl.NullableStructWriter;
+import org.apache.arrow.vector.complex.impl.UnionListWriter;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryEncoder;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.dictionary.ListSubfieldEncoder;
+import org.apache.arrow.vector.dictionary.StructSubfieldEncoder;
+import org.apache.arrow.vector.holders.NullableIntHolder;
+import org.apache.arrow.vector.holders.NullableUInt4Holder;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.Text;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestDictionaryVector {
+
+ private BufferAllocator allocator;
+
+ byte[] zero = "foo".getBytes(StandardCharsets.UTF_8);
+ byte[] one = "bar".getBytes(StandardCharsets.UTF_8);
+ byte[] two = "baz".getBytes(StandardCharsets.UTF_8);
+
+ byte[][] data = new byte[][] {zero, one, two};
+
+ @Before
+ public void init() {
+ allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testEncodeStrings() {
+ // Create a new value vector
+ try (final VarCharVector vector = newVarCharVector("foo", allocator);
+ final VarCharVector dictionaryVector = newVarCharVector("dict", allocator);) {
+
+ setVector(vector, zero, one, one, two, zero);
+ setVector(dictionaryVector, zero, one, two);
+
+ Dictionary dictionary =
+ new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null));
+
+ try (final ValueVector encoded = DictionaryEncoder.encode(vector, dictionary)) {
+ // verify indices
+ assertEquals(IntVector.class, encoded.getClass());
+
+ IntVector index = ((IntVector) encoded);
+ assertEquals(5, index.getValueCount());
+ assertEquals(0, index.get(0));
+ assertEquals(1, index.get(1));
+ assertEquals(1, index.get(2));
+ assertEquals(2, index.get(3));
+ assertEquals(0, index.get(4));
+
+ // now run through the decoder and verify we get the original back
+ try (ValueVector decoded = DictionaryEncoder.decode(encoded, dictionary)) {
+ assertEquals(vector.getClass(), decoded.getClass());
+ assertEquals(vector.getValueCount(), ((VarCharVector) decoded).getValueCount());
+ for (int i = 0; i < 5; i++) {
+ assertEquals(vector.getObject(i), ((VarCharVector) decoded).getObject(i));
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testEncodeLargeVector() {
+ // Create a new value vector
+ try (final VarCharVector vector = newVarCharVector("foo", allocator);
+ final VarCharVector dictionaryVector = newVarCharVector("dict", allocator);) {
+ vector.allocateNew();
+
+ int count = 10000;
+
+ for (int i = 0; i < 10000; ++i) {
+ vector.setSafe(i, data[i % 3], 0, data[i % 3].length);
+ }
+ vector.setValueCount(count);
+
+ setVector(dictionaryVector, zero, one, two);
+
+ Dictionary dictionary =
+ new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null));
+
+ try (final ValueVector encoded = DictionaryEncoder.encode(vector, dictionary)) {
+ // verify indices
+ assertEquals(IntVector.class, encoded.getClass());
+
+ IntVector index = ((IntVector) encoded);
+ assertEquals(count, index.getValueCount());
+ for (int i = 0; i < count; ++i) {
+ assertEquals(i % 3, index.get(i));
+ }
+
+ // now run through the decoder and verify we get the original back
+ try (ValueVector decoded = DictionaryEncoder.decode(encoded, dictionary)) {
+ assertEquals(vector.getClass(), decoded.getClass());
+ assertEquals(vector.getValueCount(), decoded.getValueCount());
+ for (int i = 0; i < count; ++i) {
+ assertEquals(vector.getObject(i), decoded.getObject(i));
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testEncodeList() {
+ // Create a new value vector
+ try (final ListVector vector = ListVector.empty("vector", allocator);
+ final ListVector dictionaryVector = ListVector.empty("dict", allocator);) {
+
+ UnionListWriter writer = vector.getWriter();
+ writer.allocate();
+
+ //set some values
+ writeListVector(writer, new int[]{10, 20});
+ writeListVector(writer, new int[]{10, 20});
+ writeListVector(writer, new int[]{10, 20});
+ writeListVector(writer, new int[]{30, 40, 50});
+ writeListVector(writer, new int[]{30, 40, 50});
+ writeListVector(writer, new int[]{10, 20});
+
+ writer.setValueCount(6);
+
+ UnionListWriter dictWriter = dictionaryVector.getWriter();
+ dictWriter.allocate();
+
+ writeListVector(dictWriter, new int[]{10, 20});
+ writeListVector(dictWriter, new int[]{30, 40, 50});
+
+ dictWriter.setValueCount(2);
+
+ Dictionary dictionary = new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null));
+
+ try (final ValueVector encoded = DictionaryEncoder.encode(vector, dictionary)) {
+ // verify indices
+ assertEquals(IntVector.class, encoded.getClass());
+
+ IntVector index = ((IntVector) encoded);
+ assertEquals(6, index.getValueCount());
+ assertEquals(0, index.get(0));
+ assertEquals(0, index.get(1));
+ assertEquals(0, index.get(2));
+ assertEquals(1, index.get(3));
+ assertEquals(1, index.get(4));
+ assertEquals(0, index.get(5));
+
+ // now run through the decoder and verify we get the original back
+ try (ValueVector decoded = DictionaryEncoder.decode(encoded, dictionary)) {
+ assertEquals(vector.getClass(), decoded.getClass());
+ assertEquals(vector.getValueCount(), decoded.getValueCount());
+ for (int i = 0; i < 5; i++) {
+ assertEquals(vector.getObject(i), decoded.getObject(i));
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testEncodeStruct() {
+ // Create a new value vector
+ try (final StructVector vector = StructVector.empty("vector", allocator);
+ final StructVector dictionaryVector = StructVector.empty("dict", allocator);) {
+ vector.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ vector.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class);
+ dictionaryVector.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ dictionaryVector.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class);
+
+ NullableStructWriter writer = vector.getWriter();
+ writer.allocate();
+
+ writeStructVector(writer, 1, 10L);
+ writeStructVector(writer, 1, 10L);
+ writeStructVector(writer, 1, 10L);
+ writeStructVector(writer, 2, 20L);
+ writeStructVector(writer, 2, 20L);
+ writeStructVector(writer, 2, 20L);
+ writeStructVector(writer, 1, 10L);
+
+ writer.setValueCount(7);
+
+ NullableStructWriter dictWriter = dictionaryVector.getWriter();
+ dictWriter.allocate();
+
+ writeStructVector(dictWriter, 1, 10L);
+ writeStructVector(dictWriter, 2, 20L);
+
+
+ dictionaryVector.setValueCount(2);
+
+ Dictionary dictionary = new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null));
+
+ try (final ValueVector encoded = DictionaryEncoder.encode(vector, dictionary)) {
+ // verify indices
+ assertEquals(IntVector.class, encoded.getClass());
+
+ IntVector index = ((IntVector) encoded);
+ assertEquals(7, index.getValueCount());
+ assertEquals(0, index.get(0));
+ assertEquals(0, index.get(1));
+ assertEquals(0, index.get(2));
+ assertEquals(1, index.get(3));
+ assertEquals(1, index.get(4));
+ assertEquals(1, index.get(5));
+ assertEquals(0, index.get(6));
+
+ // now run through the decoder and verify we get the original back
+ try (ValueVector decoded = DictionaryEncoder.decode(encoded, dictionary)) {
+ assertEquals(vector.getClass(), decoded.getClass());
+ assertEquals(vector.getValueCount(), decoded.getValueCount());
+ for (int i = 0; i < 5; i++) {
+ assertEquals(vector.getObject(i), decoded.getObject(i));
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testEncodeBinaryVector() {
+ // Create a new value vector
+ try (final VarBinaryVector vector = newVarBinaryVector("foo", allocator);
+ final VarBinaryVector dictionaryVector = newVarBinaryVector("dict", allocator)) {
+
+ setVector(vector, zero, one, one, two, zero);
+ setVector(dictionaryVector, zero, one, two);
+
+ Dictionary dictionary = new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null));
+
+ try (final ValueVector encoded = DictionaryEncoder.encode(vector, dictionary)) {
+ // verify indices
+ assertEquals(IntVector.class, encoded.getClass());
+
+ IntVector index = ((IntVector) encoded);
+ assertEquals(5, index.getValueCount());
+ assertEquals(0, index.get(0));
+ assertEquals(1, index.get(1));
+ assertEquals(1, index.get(2));
+ assertEquals(2, index.get(3));
+ assertEquals(0, index.get(4));
+
+ // now run through the decoder and verify we get the original back
+ try (VarBinaryVector decoded = (VarBinaryVector) DictionaryEncoder.decode(encoded, dictionary)) {
+ assertEquals(vector.getClass(), decoded.getClass());
+ assertEquals(vector.getValueCount(), decoded.getValueCount());
+ for (int i = 0; i < 5; i++) {
+ assertTrue(Arrays.equals(vector.getObject(i), decoded.getObject(i)));
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testEncodeUnion() {
+ // Create a new value vector
+ try (final UnionVector vector = new UnionVector("vector", allocator, /* field type */ null, /* call-back */ null);
+ final UnionVector dictionaryVector =
+ new UnionVector("dict", allocator, /* field type */ null, /* call-back */ null);) {
+
+ final NullableUInt4Holder uintHolder1 = new NullableUInt4Holder();
+ uintHolder1.value = 10;
+ uintHolder1.isSet = 1;
+
+ final NullableIntHolder intHolder1 = new NullableIntHolder();
+ intHolder1.value = 10;
+ intHolder1.isSet = 1;
+
+ final NullableIntHolder intHolder2 = new NullableIntHolder();
+ intHolder2.value = 20;
+ intHolder2.isSet = 1;
+
+ //write data
+ vector.setType(0, Types.MinorType.UINT4);
+ vector.setSafe(0, uintHolder1);
+
+ vector.setType(1, Types.MinorType.INT);
+ vector.setSafe(1, intHolder1);
+
+ vector.setType(2, Types.MinorType.INT);
+ vector.setSafe(2, intHolder1);
+
+ vector.setType(3, Types.MinorType.INT);
+ vector.setSafe(3, intHolder2);
+
+ vector.setType(4, Types.MinorType.INT);
+ vector.setSafe(4, intHolder2);
+
+ vector.setValueCount(5);
+
+ //write dictionary
+ dictionaryVector.setType(0, Types.MinorType.UINT4);
+ dictionaryVector.setSafe(0, uintHolder1);
+
+ dictionaryVector.setType(1, Types.MinorType.INT);
+ dictionaryVector.setSafe(1, intHolder1);
+
+ dictionaryVector.setType(2, Types.MinorType.INT);
+ dictionaryVector.setSafe(2, intHolder2);
+
+ dictionaryVector.setValueCount(3);
+
+ Dictionary dictionary = new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null));
+
+ try (final ValueVector encoded = DictionaryEncoder.encode(vector, dictionary)) {
+ // verify indices
+ assertEquals(IntVector.class, encoded.getClass());
+
+ IntVector index = ((IntVector) encoded);
+ assertEquals(5, index.getValueCount());
+ assertEquals(0, index.get(0));
+ assertEquals(1, index.get(1));
+ assertEquals(1, index.get(2));
+ assertEquals(2, index.get(3));
+ assertEquals(2, index.get(4));
+
+ // now run through the decoder and verify we get the original back
+ try (ValueVector decoded = DictionaryEncoder.decode(encoded, dictionary)) {
+ assertEquals(vector.getClass(), decoded.getClass());
+ assertEquals(vector.getValueCount(), decoded.getValueCount());
+ for (int i = 0; i < 5; i++) {
+ assertEquals(vector.getObject(i), decoded.getObject(i));
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testIntEquals() {
+ //test Int
+ try (final IntVector vector1 = new IntVector("int", allocator);
+ final IntVector vector2 = new IntVector("int", allocator)) {
+
+ Dictionary dict1 = new Dictionary(vector1, new DictionaryEncoding(1L, false, null));
+ Dictionary dict2 = new Dictionary(vector2, new DictionaryEncoding(1L, false, null));
+
+ setVector(vector1, 1, 2, 3);
+ setVector(vector2, 1, 2, 0);
+
+ assertFalse(dict1.equals(dict2));
+
+ vector2.setSafe(2, 3);
+ assertTrue(dict1.equals(dict2));
+ }
+ }
+
+ @Test
+ public void testVarcharEquals() {
+ try (final VarCharVector vector1 = new VarCharVector("varchar", allocator);
+ final VarCharVector vector2 = new VarCharVector("varchar", allocator)) {
+
+ Dictionary dict1 = new Dictionary(vector1, new DictionaryEncoding(1L, false, null));
+ Dictionary dict2 = new Dictionary(vector2, new DictionaryEncoding(1L, false, null));
+
+ setVector(vector1, zero, one, two);
+ setVector(vector2, zero, one, one);
+
+ assertFalse(dict1.equals(dict2));
+
+ vector2.setSafe(2, two, 0, two.length);
+ assertTrue(dict1.equals(dict2));
+ }
+ }
+
+ @Test
+ public void testVarBinaryEquals() {
+ try (final VarBinaryVector vector1 = new VarBinaryVector("binary", allocator);
+ final VarBinaryVector vector2 = new VarBinaryVector("binary", allocator)) {
+
+ Dictionary dict1 = new Dictionary(vector1, new DictionaryEncoding(1L, false, null));
+ Dictionary dict2 = new Dictionary(vector2, new DictionaryEncoding(1L, false, null));
+
+ setVector(vector1, zero, one, two);
+ setVector(vector2, zero, one, one);
+
+ assertFalse(dict1.equals(dict2));
+
+ vector2.setSafe(2, two, 0, two.length);
+ assertTrue(dict1.equals(dict2));
+ }
+ }
+
+ @Test
+ public void testListEquals() {
+ try (final ListVector vector1 = ListVector.empty("list", allocator);
+ final ListVector vector2 = ListVector.empty("list", allocator);) {
+
+ Dictionary dict1 = new Dictionary(vector1, new DictionaryEncoding(1L, false, null));
+ Dictionary dict2 = new Dictionary(vector2, new DictionaryEncoding(1L, false, null));
+
+ UnionListWriter writer1 = vector1.getWriter();
+ writer1.allocate();
+
+ //set some values
+ writeListVector(writer1, new int[] {1, 2});
+ writeListVector(writer1, new int[] {3, 4});
+ writeListVector(writer1, new int[] {5, 6});
+ writer1.setValueCount(3);
+
+ UnionListWriter writer2 = vector2.getWriter();
+ writer2.allocate();
+
+ //set some values
+ writeListVector(writer2, new int[] {1, 2});
+ writeListVector(writer2, new int[] {3, 4});
+ writeListVector(writer2, new int[] {5, 6});
+ writer2.setValueCount(3);
+
+ assertTrue(dict1.equals(dict2));
+ }
+ }
+
+ @Test
+ public void testStructEquals() {
+ try (final StructVector vector1 = StructVector.empty("struct", allocator);
+ final StructVector vector2 = StructVector.empty("struct", allocator);) {
+ vector1.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ vector1.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class);
+ vector2.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ vector2.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class);
+
+ Dictionary dict1 = new Dictionary(vector1, new DictionaryEncoding(1L, false, null));
+ Dictionary dict2 = new Dictionary(vector2, new DictionaryEncoding(1L, false, null));
+
+ NullableStructWriter writer1 = vector1.getWriter();
+ writer1.allocate();
+
+ writeStructVector(writer1, 1, 10L);
+ writeStructVector(writer1, 2, 20L);
+ writer1.setValueCount(2);
+
+ NullableStructWriter writer2 = vector2.getWriter();
+ writer2.allocate();
+
+ writeStructVector(writer2, 1, 10L);
+ writeStructVector(writer2, 2, 20L);
+ writer2.setValueCount(2);
+
+ assertTrue(dict1.equals(dict2));
+ }
+ }
+
+ @Test
+ public void testUnionEquals() {
+ try (final UnionVector vector1 = new UnionVector("union", allocator, /* field type */ null, /* call-back */ null);
+ final UnionVector vector2 =
+ new UnionVector("union", allocator, /* field type */ null, /* call-back */ null);) {
+
+ final NullableUInt4Holder uInt4Holder = new NullableUInt4Holder();
+ uInt4Holder.value = 10;
+ uInt4Holder.isSet = 1;
+
+ final NullableIntHolder intHolder = new NullableIntHolder();
+ uInt4Holder.value = 20;
+ uInt4Holder.isSet = 1;
+
+ vector1.setType(0, Types.MinorType.UINT4);
+ vector1.setSafe(0, uInt4Holder);
+
+ vector1.setType(2, Types.MinorType.INT);
+ vector1.setSafe(2, intHolder);
+ vector1.setValueCount(3);
+
+ vector2.setType(0, Types.MinorType.UINT4);
+ vector2.setSafe(0, uInt4Holder);
+
+ vector2.setType(2, Types.MinorType.INT);
+ vector2.setSafe(2, intHolder);
+ vector2.setValueCount(3);
+
+ Dictionary dict1 = new Dictionary(vector1, new DictionaryEncoding(1L, false, null));
+ Dictionary dict2 = new Dictionary(vector2, new DictionaryEncoding(1L, false, null));
+
+ assertTrue(dict1.equals(dict2));
+ }
+ }
+
+ @Test
+ public void testEncodeWithEncoderInstance() {
+ // Create a new value vector
+ try (final VarCharVector vector = newVarCharVector("vector", allocator);
+ final VarCharVector dictionaryVector = newVarCharVector("dict", allocator);) {
+
+ setVector(vector, zero, one, one, two, zero);
+ setVector(dictionaryVector, zero, one, two);
+
+ Dictionary dictionary =
+ new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null));
+ DictionaryEncoder encoder = new DictionaryEncoder(dictionary, allocator);
+
+ try (final ValueVector encoded = encoder.encode(vector)) {
+ // verify indices
+ assertEquals(IntVector.class, encoded.getClass());
+
+ IntVector index = ((IntVector) encoded);
+ assertEquals(5, index.getValueCount());
+ assertEquals(0, index.get(0));
+ assertEquals(1, index.get(1));
+ assertEquals(1, index.get(2));
+ assertEquals(2, index.get(3));
+ assertEquals(0, index.get(4));
+
+ // now run through the decoder and verify we get the original back
+ try (ValueVector decoded = encoder.decode(encoded)) {
+ assertEquals(vector.getClass(), decoded.getClass());
+ assertEquals(vector.getValueCount(), (decoded).getValueCount());
+ for (int i = 0; i < 5; i++) {
+ assertEquals(vector.getObject(i), ((VarCharVector) decoded).getObject(i));
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testEncodeMultiVectors() {
+ // Create a new value vector
+ try (final VarCharVector vector1 = newVarCharVector("vector1", allocator);
+ final VarCharVector vector2 = newVarCharVector("vector2", allocator);
+ final VarCharVector dictionaryVector = newVarCharVector("dict", allocator);) {
+
+ setVector(vector1, zero, one, one, two, zero);
+ setVector(vector2, zero, one, one);
+ setVector(dictionaryVector, zero, one, two);
+
+ Dictionary dictionary =
+ new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null));
+ DictionaryEncoder encoder = new DictionaryEncoder(dictionary, allocator);
+
+ try (final ValueVector encoded = encoder.encode(vector1)) {
+ // verify indices
+ assertEquals(IntVector.class, encoded.getClass());
+
+ IntVector index = ((IntVector) encoded);
+ assertEquals(5, index.getValueCount());
+ assertEquals(0, index.get(0));
+ assertEquals(1, index.get(1));
+ assertEquals(1, index.get(2));
+ assertEquals(2, index.get(3));
+ assertEquals(0, index.get(4));
+
+ // now run through the decoder and verify we get the original back
+ try (ValueVector decoded = encoder.decode(encoded)) {
+ assertEquals(vector1.getClass(), decoded.getClass());
+ assertEquals(vector1.getValueCount(), (decoded).getValueCount());
+ for (int i = 0; i < 5; i++) {
+ assertEquals(vector1.getObject(i), ((VarCharVector) decoded).getObject(i));
+ }
+ }
+ }
+
+ try (final ValueVector encoded = encoder.encode(vector2)) {
+ // verify indices
+ assertEquals(IntVector.class, encoded.getClass());
+
+ IntVector index = ((IntVector) encoded);
+ assertEquals(3, index.getValueCount());
+ assertEquals(0, index.get(0));
+ assertEquals(1, index.get(1));
+ assertEquals(1, index.get(2));
+
+ // now run through the decoder and verify we get the original back
+ try (ValueVector decoded = encoder.decode(encoded)) {
+ assertEquals(vector2.getClass(), decoded.getClass());
+ assertEquals(vector2.getValueCount(), (decoded).getValueCount());
+ for (int i = 0; i < 3; i++) {
+ assertEquals(vector2.getObject(i), ((VarCharVector) decoded).getObject(i));
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testEncodeListSubField() {
+ // Create a new value vector
+ try (final ListVector vector = ListVector.empty("vector", allocator);
+ final ListVector dictionaryVector = ListVector.empty("dict", allocator);) {
+
+ UnionListWriter writer = vector.getWriter();
+ writer.allocate();
+
+ //set some values
+ writeListVector(writer, new int[]{10, 20});
+ writeListVector(writer, new int[]{10, 20});
+ writeListVector(writer, new int[]{10, 20});
+ writeListVector(writer, new int[]{30, 40, 50});
+ writeListVector(writer, new int[]{30, 40, 50});
+ writeListVector(writer, new int[]{10, 20});
+ writer.setValueCount(6);
+
+ UnionListWriter dictWriter = dictionaryVector.getWriter();
+ dictWriter.allocate();
+ writeListVector(dictWriter, new int[]{10, 20, 30, 40, 50});
+ dictionaryVector.setValueCount(1);
+
+ Dictionary dictionary = new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null));
+ ListSubfieldEncoder encoder = new ListSubfieldEncoder(dictionary, allocator);
+
+ try (final ListVector encoded = (ListVector) encoder.encodeListSubField(vector)) {
+ // verify indices
+ assertEquals(ListVector.class, encoded.getClass());
+
+ assertEquals(6, encoded.getValueCount());
+ int[] realValue1 = convertListToIntArray(encoded.getObject(0));
+ assertTrue(Arrays.equals(new int[] {0, 1}, realValue1));
+ int[] realValue2 = convertListToIntArray(encoded.getObject(1));
+ assertTrue(Arrays.equals(new int[] {0, 1}, realValue2));
+ int[] realValue3 = convertListToIntArray(encoded.getObject(2));
+ assertTrue(Arrays.equals(new int[] {0, 1}, realValue3));
+ int[] realValue4 = convertListToIntArray(encoded.getObject(3));
+ assertTrue(Arrays.equals(new int[] {2, 3, 4}, realValue4));
+ int[] realValue5 = convertListToIntArray(encoded.getObject(4));
+ assertTrue(Arrays.equals(new int[] {2, 3, 4}, realValue5));
+ int[] realValue6 = convertListToIntArray(encoded.getObject(5));
+ assertTrue(Arrays.equals(new int[] {0, 1}, realValue6));
+
+ // now run through the decoder and verify we get the original back
+ try (ValueVector decoded = encoder.decodeListSubField(encoded)) {
+ assertEquals(vector.getClass(), decoded.getClass());
+ assertEquals(vector.getValueCount(), decoded.getValueCount());
+ for (int i = 0; i < 5; i++) {
+ assertEquals(vector.getObject(i), decoded.getObject(i));
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testEncodeFixedSizeListSubField() {
+ // Create a new value vector
+ try (final FixedSizeListVector vector = FixedSizeListVector.empty("vector", 2, allocator);
+ final FixedSizeListVector dictionaryVector = FixedSizeListVector.empty("dict", 2, allocator)) {
+
+ vector.allocateNew();
+ vector.setValueCount(4);
+
+ IntVector dataVector =
+ (IntVector) vector.addOrGetVector(FieldType.nullable(Types.MinorType.INT.getType())).getVector();
+ dataVector.allocateNew(8);
+ dataVector.setValueCount(8);
+ // set value at index 0
+ vector.setNotNull(0);
+ dataVector.set(0, 10);
+ dataVector.set(1, 20);
+ // set value at index 1
+ vector.setNotNull(1);
+ dataVector.set(2, 10);
+ dataVector.set(3, 20);
+ // set value at index 2
+ vector.setNotNull(2);
+ dataVector.set(4, 30);
+ dataVector.set(5, 40);
+ // set value at index 3
+ vector.setNotNull(3);
+ dataVector.set(6, 10);
+ dataVector.set(7, 20);
+
+ dictionaryVector.allocateNew();
+ dictionaryVector.setValueCount(2);
+ IntVector dictDataVector =
+ (IntVector) dictionaryVector.addOrGetVector(FieldType.nullable(Types.MinorType.INT.getType())).getVector();
+ dictDataVector.allocateNew(4);
+ dictDataVector.setValueCount(4);
+
+ dictionaryVector.setNotNull(0);
+ dictDataVector.set(0, 10);
+ dictDataVector.set(1, 20);
+ dictionaryVector.setNotNull(1);
+ dictDataVector.set(2, 30);
+ dictDataVector.set(3, 40);
+
+ Dictionary dictionary = new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null));
+ ListSubfieldEncoder encoder = new ListSubfieldEncoder(dictionary, allocator);
+
+ try (final FixedSizeListVector encoded =
+ (FixedSizeListVector) encoder.encodeListSubField(vector)) {
+ // verify indices
+ assertEquals(FixedSizeListVector.class, encoded.getClass());
+
+ assertEquals(4, encoded.getValueCount());
+ int[] realValue1 = convertListToIntArray(encoded.getObject(0));
+ assertTrue(Arrays.equals(new int[] {0, 1}, realValue1));
+ int[] realValue2 = convertListToIntArray(encoded.getObject(1));
+ assertTrue(Arrays.equals(new int[] {0, 1}, realValue2));
+ int[] realValue3 = convertListToIntArray(encoded.getObject(2));
+ assertTrue(Arrays.equals(new int[] {2, 3}, realValue3));
+ int[] realValue4 = convertListToIntArray(encoded.getObject(3));
+ assertTrue(Arrays.equals(new int[] {0, 1}, realValue4));
+
+ // now run through the decoder and verify we get the original back
+ try (ValueVector decoded = encoder.decodeListSubField(encoded)) {
+ assertEquals(vector.getClass(), decoded.getClass());
+ assertEquals(vector.getValueCount(), decoded.getValueCount());
+ for (int i = 0; i < 5; i++) {
+ assertEquals(vector.getObject(i), decoded.getObject(i));
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testEncodeStructSubField() {
+ try (final StructVector vector = StructVector.empty("vector", allocator);
+ final VarCharVector dictVector1 = new VarCharVector("f0", allocator);
+ final VarCharVector dictVector2 = new VarCharVector("f1", allocator)) {
+
+ vector.addOrGet("f0", FieldType.nullable(ArrowType.Utf8.INSTANCE), VarCharVector.class);
+ vector.addOrGet("f1", FieldType.nullable(ArrowType.Utf8.INSTANCE), VarCharVector.class);
+
+ NullableStructWriter writer = vector.getWriter();
+ writer.allocate();
+ //set some values
+ writeStructVector(writer, "aa", "baz");
+ writeStructVector(writer, "bb", "bar");
+ writeStructVector(writer, "cc", "foo");
+ writeStructVector(writer, "aa", "foo");
+ writeStructVector(writer, "dd", "foo");
+ writer.setValueCount(5);
+
+ // initialize dictionaries
+ DictionaryProvider.MapDictionaryProvider provider = new DictionaryProvider.MapDictionaryProvider();
+
+
+ setVector(dictVector1,
+ "aa".getBytes(StandardCharsets.UTF_8),
+ "bb".getBytes(StandardCharsets.UTF_8),
+ "cc".getBytes(StandardCharsets.UTF_8),
+ "dd".getBytes(StandardCharsets.UTF_8));
+ setVector(dictVector2,
+ "foo".getBytes(StandardCharsets.UTF_8),
+ "baz".getBytes(StandardCharsets.UTF_8),
+ "bar".getBytes(StandardCharsets.UTF_8));
+
+ provider.put(new Dictionary(dictVector1, new DictionaryEncoding(1L, false, null)));
+ provider.put(new Dictionary(dictVector2, new DictionaryEncoding(2L, false, null)));
+
+ StructSubfieldEncoder encoder = new StructSubfieldEncoder(allocator, provider);
+ Map<Integer, Long> columnToDictionaryId = new HashMap<>();
+ columnToDictionaryId.put(0, 1L);
+ columnToDictionaryId.put(1, 2L);
+
+ try (final StructVector encoded = (StructVector) encoder.encode(vector, columnToDictionaryId)) {
+ // verify indices
+ assertEquals(StructVector.class, encoded.getClass());
+
+ assertEquals(5, encoded.getValueCount());
+ Object[] realValue1 = convertMapValuesToArray(encoded.getObject(0));
+ assertTrue(Arrays.equals(new Object[] {0, 1}, realValue1));
+ Object[] realValue2 = convertMapValuesToArray(encoded.getObject(1));
+ assertTrue(Arrays.equals(new Object[] {1, 2}, realValue2));
+ Object[] realValue3 = convertMapValuesToArray(encoded.getObject(2));
+ assertTrue(Arrays.equals(new Object[] {2, 0}, realValue3));
+ Object[] realValue4 = convertMapValuesToArray(encoded.getObject(3));
+ assertTrue(Arrays.equals(new Object[] {0, 0}, realValue4));
+ Object[] realValue5 = convertMapValuesToArray(encoded.getObject(4));
+ assertTrue(Arrays.equals(new Object[] {3, 0}, realValue5));
+
+ // now run through the decoder and verify we get the original back
+ try (ValueVector decoded = encoder.decode(encoded)) {
+ assertEquals(vector.getClass(), decoded.getClass());
+ assertEquals(vector.getValueCount(), decoded.getValueCount());
+ for (int i = 0; i < 5; i++) {
+ assertEquals(vector.getObject(i), decoded.getObject(i));
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testEncodeStructSubFieldWithCertainColumns() {
+ // in this case, some child vector is encoded and others are not
+ try (final StructVector vector = StructVector.empty("vector", allocator);
+ final VarCharVector dictVector1 = new VarCharVector("f0", allocator)) {
+
+ vector.addOrGet("f0", FieldType.nullable(ArrowType.Utf8.INSTANCE), VarCharVector.class);
+ vector.addOrGet("f1", FieldType.nullable(ArrowType.Utf8.INSTANCE), VarCharVector.class);
+
+ NullableStructWriter writer = vector.getWriter();
+ writer.allocate();
+ //set some values
+ writeStructVector(writer, "aa", "baz");
+ writeStructVector(writer, "bb", "bar");
+ writeStructVector(writer, "cc", "foo");
+ writeStructVector(writer, "aa", "foo");
+ writeStructVector(writer, "dd", "foo");
+ writer.setValueCount(5);
+
+ // initialize dictionaries
+ DictionaryProvider.MapDictionaryProvider provider = new DictionaryProvider.MapDictionaryProvider();
+
+ setVector(dictVector1, "aa".getBytes(), "bb".getBytes(), "cc".getBytes(), "dd".getBytes());
+
+ provider.put(new Dictionary(dictVector1, new DictionaryEncoding(1L, false, null)));
+ StructSubfieldEncoder encoder = new StructSubfieldEncoder(allocator, provider);
+ Map<Integer, Long> columnToDictionaryId = new HashMap<>();
+ columnToDictionaryId.put(0, 1L);
+
+ try (final StructVector encoded = (StructVector) encoder.encode(vector, columnToDictionaryId)) {
+ // verify indices
+ assertEquals(StructVector.class, encoded.getClass());
+
+ assertEquals(5, encoded.getValueCount());
+ Object[] realValue1 = convertMapValuesToArray(encoded.getObject(0));
+ assertTrue(Arrays.equals(new Object[] {0, new Text("baz")}, realValue1));
+ Object[] realValue2 = convertMapValuesToArray(encoded.getObject(1));
+ assertTrue(Arrays.equals(new Object[] {1, new Text("bar")}, realValue2));
+ Object[] realValue3 = convertMapValuesToArray(encoded.getObject(2));
+ assertTrue(Arrays.equals(new Object[] {2, new Text("foo")}, realValue3));
+ Object[] realValue4 = convertMapValuesToArray(encoded.getObject(3));
+ assertTrue(Arrays.equals(new Object[] {0, new Text("foo")}, realValue4));
+ Object[] realValue5 = convertMapValuesToArray(encoded.getObject(4));
+ assertTrue(Arrays.equals(new Object[] {3, new Text("foo")}, realValue5));
+
+ // now run through the decoder and verify we get the original back
+ try (ValueVector decoded = encoder.decode(encoded)) {
+ assertEquals(vector.getClass(), decoded.getClass());
+ assertEquals(vector.getValueCount(), decoded.getValueCount());
+ for (int i = 0; i < 5; i++) {
+ assertEquals(vector.getObject(i), decoded.getObject(i));
+ }
+ }
+ }
+
+ }
+ }
+
+ private void testDictionary(Dictionary dictionary, ToIntBiFunction<ValueVector, Integer> valGetter) {
+ try (VarCharVector vector = new VarCharVector("vector", allocator)) {
+ setVector(vector, "1", "3", "5", "7", "9");
+ try (ValueVector encodedVector = DictionaryEncoder.encode(vector, dictionary)) {
+
+ // verify encoded result
+ assertEquals(vector.getValueCount(), encodedVector.getValueCount());
+ assertEquals(valGetter.applyAsInt(encodedVector, 0), 1);
+ assertEquals(valGetter.applyAsInt(encodedVector, 1), 3);
+ assertEquals(valGetter.applyAsInt(encodedVector, 2), 5);
+ assertEquals(valGetter.applyAsInt(encodedVector, 3), 7);
+ assertEquals(valGetter.applyAsInt(encodedVector, 4), 9);
+
+ try (ValueVector decodedVector = DictionaryEncoder.decode(encodedVector, dictionary)) {
+ assertTrue(decodedVector instanceof VarCharVector);
+ assertEquals(vector.getValueCount(), decodedVector.getValueCount());
+ assertArrayEquals("1".getBytes(), ((VarCharVector) decodedVector).get(0));
+ assertArrayEquals("3".getBytes(), ((VarCharVector) decodedVector).get(1));
+ assertArrayEquals("5".getBytes(), ((VarCharVector) decodedVector).get(2));
+ assertArrayEquals("7".getBytes(), ((VarCharVector) decodedVector).get(3));
+ assertArrayEquals("9".getBytes(), ((VarCharVector) decodedVector).get(4));
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testDictionaryUInt1() {
+ try (VarCharVector dictionaryVector = new VarCharVector("dict vector", allocator)) {
+ setVector(dictionaryVector, "0", "1", "2", "3", "4", "5", "6", "7", "8", "9");
+ Dictionary dictionary1 = new Dictionary(dictionaryVector,
+ new DictionaryEncoding(/*id=*/10L, /*ordered=*/false,
+ /*indexType=*/new ArrowType.Int(/*bitWidth*/8, /*isSigned*/false)));
+ testDictionary(dictionary1, (vector, index) -> ((UInt1Vector) vector).get(index));
+ }
+ }
+
+ @Test
+ public void testDictionaryUInt2() {
+ try (VarCharVector dictionaryVector = new VarCharVector("dict vector", allocator)) {
+ setVector(dictionaryVector, "0", "1", "2", "3", "4", "5", "6", "7", "8", "9");
+ Dictionary dictionary2 = new Dictionary(dictionaryVector,
+ new DictionaryEncoding(/*id=*/20L, /*ordered=*/false,
+ /*indexType=*/new ArrowType.Int(/*indexType=*/16, /*isSigned*/false)));
+ testDictionary(dictionary2, (vector, index) -> ((UInt2Vector) vector).get(index));
+ }
+ }
+
+ @Test
+ public void testDictionaryUInt4() {
+ try (VarCharVector dictionaryVector = new VarCharVector("dict vector", allocator)) {
+ setVector(dictionaryVector, "0", "1", "2", "3", "4", "5", "6", "7", "8", "9");
+ Dictionary dictionary4 = new Dictionary(dictionaryVector,
+ new DictionaryEncoding(/*id=*/30L, /*ordered=*/false,
+ /*indexType=*/new ArrowType.Int(/*indexType=*/32, /*isSigned*/false)));
+ testDictionary(dictionary4, (vector, index) -> ((UInt4Vector) vector).get(index));
+ }
+ }
+
+ @Test
+ public void testDictionaryUInt8() {
+ try (VarCharVector dictionaryVector = new VarCharVector("dict vector", allocator)) {
+ setVector(dictionaryVector, "0", "1", "2", "3", "4", "5", "6", "7", "8", "9");
+ Dictionary dictionary8 = new Dictionary(dictionaryVector,
+ new DictionaryEncoding(/*id=*/40L, /*ordered=*/false,
+ /*indexType=*/new ArrowType.Int(/*indexType=*/64, /*isSigned*/false)));
+ testDictionary(dictionary8, (vector, index) -> (int) ((UInt8Vector) vector).get(index));
+ }
+ }
+
+ @Test
+ public void testDictionaryUIntOverflow() {
+ // the size is within the range of UInt1, but outside the range of TinyInt.
+ final int vecLength = 256;
+ try (VarCharVector dictionaryVector = new VarCharVector("dict vector", allocator)) {
+ dictionaryVector.allocateNew(vecLength * 3, vecLength);
+ for (int i = 0; i < vecLength; i++) {
+ dictionaryVector.set(i, String.valueOf(i).getBytes());
+ }
+ dictionaryVector.setValueCount(vecLength);
+
+ Dictionary dictionary = new Dictionary(dictionaryVector,
+ new DictionaryEncoding(/*id=*/10L, /*ordered=*/false,
+ /*indexType=*/new ArrowType.Int(/*indexType=*/8, /*isSigned*/false)));
+
+ try (VarCharVector vector = new VarCharVector("vector", allocator)) {
+ setVector(vector, "255");
+ try (UInt1Vector encodedVector = (UInt1Vector) DictionaryEncoder.encode(vector, dictionary)) {
+
+ // verify encoded result
+ assertEquals(1, encodedVector.getValueCount());
+ assertEquals(255, encodedVector.getValueAsLong(0));
+
+ try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dictionary)) {
+ assertEquals(1, decodedVector.getValueCount());
+ assertArrayEquals("255".getBytes(), decodedVector.get(0));
+ }
+ }
+ }
+ }
+ }
+
+ private int[] convertListToIntArray(List list) {
+ int[] values = new int[list.size()];
+ for (int i = 0; i < list.size(); i++) {
+ values[i] = (int) list.get(i);
+ }
+ return values;
+ }
+
+ private Object[] convertMapValuesToArray(Map map) {
+ Object[] values = new Object[map.size()];
+ Iterator valueIterator = map.values().iterator();
+ for (int i = 0; i < map.size(); i++) {
+ values[i] = valueIterator.next();
+ }
+ return values;
+ }
+
+ private void writeStructVector(NullableStructWriter writer, String value1, String value2) {
+
+ byte[] bytes1 = value1.getBytes(StandardCharsets.UTF_8);
+ byte[] bytes2 = value2.getBytes(StandardCharsets.UTF_8);
+ ArrowBuf temp = allocator.buffer(bytes1.length > bytes2.length ? bytes1.length : bytes2.length);
+
+ writer.start();
+ temp.setBytes(0, bytes1);
+ writer.varChar("f0").writeVarChar(0, bytes1.length, temp);
+ temp.setBytes(0, bytes2);
+ writer.varChar("f1").writeVarChar(0, bytes2.length, temp);
+ writer.end();
+ temp.close();
+ }
+
+ private void writeStructVector(NullableStructWriter writer, int value1, long value2) {
+ writer.start();
+ writer.integer("f0").writeInt(value1);
+ writer.bigInt("f1").writeBigInt(value2);
+ writer.end();
+ }
+
+ private void writeListVector(UnionListWriter writer, int[] values) {
+ writer.startList();
+ for (int v: values) {
+ writer.integer().writeInt(v);
+ }
+ writer.endList();
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestDurationVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestDurationVector.java
new file mode 100644
index 000000000..8ae876f20
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestDurationVector.java
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+import java.time.Duration;
+
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.holders.NullableDurationHolder;
+import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestDurationVector {
+ RootAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100);
+ }
+
+ @After
+ public void terminate() {
+ allocator.close();
+ }
+
+ @Test
+ public void testSecBasics() {
+ try (DurationVector secVector = TestUtils.newVector(DurationVector.class, "second",
+ new ArrowType.Duration(TimeUnit.SECOND), allocator)) {
+
+ secVector.allocateNew();
+ secVector.setNull(0);
+ secVector.setSafe(1, 1000);
+ secVector.setValueCount(2);
+ assertNull(secVector.getObject(0));
+ assertEquals(Duration.ofSeconds(1000), secVector.getObject(1));
+ assertNull(secVector.getAsStringBuilder(0));
+ assertEquals("PT16M40S", secVector.getAsStringBuilder(1).toString());
+ // Holder
+ NullableDurationHolder holder = new NullableDurationHolder();
+ secVector.get(0, holder);
+ assertEquals(0, holder.isSet);
+ secVector.get(1, holder);
+ assertEquals(1 , holder.isSet);
+ assertEquals(1000 , holder.value);
+ }
+ }
+
+ @Test
+ public void testMilliBasics() {
+ try (DurationVector milliVector = TestUtils.newVector(DurationVector.class, "nanos",
+ new ArrowType.Duration(TimeUnit.MILLISECOND), allocator)) {
+
+ milliVector.allocateNew();
+ milliVector.setNull(0);
+ milliVector.setSafe(1, 1000);
+ milliVector.setValueCount(2);
+ assertNull(milliVector.getObject(0));
+ assertEquals(Duration.ofSeconds(1), milliVector.getObject(1));
+ assertNull(milliVector.getAsStringBuilder(0));
+ assertEquals("PT1S", milliVector.getAsStringBuilder(1).toString());
+ // Holder
+ NullableDurationHolder holder = new NullableDurationHolder();
+ milliVector.get(0, holder);
+ assertEquals(0, holder.isSet);
+ milliVector.get(1, holder);
+ assertEquals(1 , holder.isSet);
+ assertEquals(1000 , holder.value);
+ }
+ }
+
+ @Test
+ public void testMicroBasics() {
+ try (DurationVector microVector = TestUtils.newVector(DurationVector.class, "micro",
+ new ArrowType.Duration(TimeUnit.MICROSECOND), allocator)) {
+
+ microVector.allocateNew();
+ microVector.setNull(0);
+ microVector.setSafe(1, 1000);
+ microVector.setValueCount(2);
+ assertNull(microVector.getObject(0));
+ assertEquals(Duration.ofMillis(1), microVector.getObject(1));
+ assertNull(microVector.getAsStringBuilder(0));
+ assertEquals("PT0.001S", microVector.getAsStringBuilder(1).toString());
+ // Holder
+ NullableDurationHolder holder = new NullableDurationHolder();
+ microVector.get(0, holder);
+ assertEquals(0, holder.isSet);
+ microVector.get(1, holder);
+ assertEquals(1 , holder.isSet);
+ assertEquals(1000 , holder.value);
+ }
+ }
+
+ @Test
+ public void testNanosBasics() {
+ try (DurationVector nanoVector = TestUtils.newVector(DurationVector.class, "nanos",
+ new ArrowType.Duration(TimeUnit.NANOSECOND), allocator)) {
+
+ nanoVector.allocateNew();
+ nanoVector.setNull(0);
+ nanoVector.setSafe(1, 1000000);
+ nanoVector.setValueCount(2);
+ assertNull(nanoVector.getObject(0));
+ assertEquals(Duration.ofMillis(1), nanoVector.getObject(1));
+ assertNull(nanoVector.getAsStringBuilder(0));
+ assertEquals("PT0.001S", nanoVector.getAsStringBuilder(1).toString());
+ // Holder
+ NullableDurationHolder holder = new NullableDurationHolder();
+ nanoVector.get(0, holder);
+ assertEquals(0, holder.isSet);
+ nanoVector.get(1, holder);
+ assertEquals(1 , holder.isSet);
+ assertEquals(1000000 , holder.value);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeBinaryVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeBinaryVector.java
new file mode 100644
index 000000000..363821e98
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeBinaryVector.java
@@ -0,0 +1,279 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.*;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.holders.FixedSizeBinaryHolder;
+import org.apache.arrow.vector.holders.NullableFixedSizeBinaryHolder;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestFixedSizeBinaryVector {
+ private static final int numValues = 123;
+ private static final int typeWidth = 9;
+ private static final int smallDataSize = 6;
+ private static final int largeDataSize = 12;
+
+ private static byte[][] values;
+
+ static {
+ values = new byte[numValues][typeWidth];
+ for (int i = 0; i < numValues; i++) {
+ for (int j = 0; j < typeWidth; j++) {
+ values[i][j] = ((byte) i);
+ }
+ }
+ }
+
+ private ArrowBuf[] bufs = new ArrowBuf[numValues];
+ private FixedSizeBinaryHolder[] holders = new FixedSizeBinaryHolder[numValues];
+ private NullableFixedSizeBinaryHolder[] nullableHolders = new NullableFixedSizeBinaryHolder[numValues];
+
+ private static byte[] smallValue;
+
+ static {
+ smallValue = new byte[smallDataSize];
+ for (int i = 0; i < smallDataSize; i++) {
+ smallValue[i] = ((byte) i);
+ }
+ }
+
+ private ArrowBuf smallBuf;
+ private FixedSizeBinaryHolder smallHolder;
+ private NullableFixedSizeBinaryHolder smallNullableHolder;
+
+ private static byte[] largeValue;
+
+ static {
+ largeValue = new byte[largeDataSize];
+ for (int i = 0; i < largeDataSize; i++) {
+ largeValue[i] = ((byte) i);
+ }
+ }
+
+ private ArrowBuf largeBuf;
+ private FixedSizeBinaryHolder largeHolder;
+ private NullableFixedSizeBinaryHolder largeNullableHolder;
+
+ private BufferAllocator allocator;
+ private FixedSizeBinaryVector vector;
+
+ private static void failWithException(String message) throws Exception {
+ throw new Exception(message);
+ }
+
+
+ @Before
+ public void init() throws Exception {
+ allocator = new DirtyRootAllocator(Integer.MAX_VALUE, (byte) 100);
+ vector = new FixedSizeBinaryVector("fixedSizeBinary", allocator, typeWidth);
+ vector.allocateNew();
+
+ for (int i = 0; i < numValues; i++) {
+ bufs[i] = allocator.buffer(typeWidth);
+ bufs[i].setBytes(0, values[i]);
+
+ holders[i] = new FixedSizeBinaryHolder();
+ holders[i].byteWidth = typeWidth;
+ holders[i].buffer = bufs[i];
+
+ nullableHolders[i] = new NullableFixedSizeBinaryHolder();
+ nullableHolders[i].byteWidth = typeWidth;
+ nullableHolders[i].buffer = bufs[i];
+ nullableHolders[i].isSet = 1;
+ }
+
+ smallBuf = allocator.buffer(smallDataSize);
+ smallBuf.setBytes(0, smallValue);
+
+ smallHolder = new FixedSizeBinaryHolder();
+ smallHolder.byteWidth = smallDataSize;
+ smallHolder.buffer = smallBuf;
+
+ smallNullableHolder = new NullableFixedSizeBinaryHolder();
+ smallNullableHolder.byteWidth = smallDataSize;
+ smallNullableHolder.buffer = smallBuf;
+
+ largeBuf = allocator.buffer(largeDataSize);
+ largeBuf.setBytes(0, largeValue);
+
+ largeHolder = new FixedSizeBinaryHolder();
+ largeHolder.byteWidth = typeWidth;
+ largeHolder.buffer = largeBuf;
+
+ largeNullableHolder = new NullableFixedSizeBinaryHolder();
+ largeNullableHolder.byteWidth = typeWidth;
+ largeNullableHolder.buffer = largeBuf;
+ }
+
+ @After
+ public void terminate() throws Exception {
+ for (int i = 0; i < numValues; i++) {
+ bufs[i].close();
+ }
+ smallBuf.close();
+ largeBuf.close();
+
+ vector.close();
+ allocator.close();
+ }
+
+ @Test
+ public void testSetUsingByteArray() {
+ for (int i = 0; i < numValues; i++) {
+ vector.set(i, values[i]);
+ }
+ vector.setValueCount(numValues);
+ for (int i = 0; i < numValues; i++) {
+ assertArrayEquals(values[i], vector.getObject(i));
+ }
+ }
+
+ @Test
+ public void testSetUsingNull() {
+ final byte[] value = null;
+ for (int i = 0; i < numValues; i++) {
+ final int index = i;
+ Exception e = assertThrows(NullPointerException.class, () -> {
+ vector.set(index, value);
+ });
+ assertEquals("expecting a valid byte array", e.getMessage());
+ }
+ }
+
+ @Test
+ public void testSetUsingHolder() {
+ for (int i = 0; i < numValues; i++) {
+ vector.set(i, holders[i]);
+ }
+ vector.setValueCount(numValues);
+ for (int i = 0; i < numValues; i++) {
+ assertArrayEquals(values[i], vector.getObject(i));
+ }
+ }
+
+ @Test
+ public void testSetUsingNullableHolder() {
+ for (int i = 0; i < numValues; i++) {
+ vector.set(i, nullableHolders[i]);
+ }
+ vector.setValueCount(numValues);
+ for (int i = 0; i < numValues; i++) {
+ assertArrayEquals(values[i], vector.getObject(i));
+ }
+ }
+
+ @Test
+ public void testGetUsingNullableHolder() {
+ for (int i = 0; i < numValues; i++) {
+ vector.set(i, holders[i]);
+ }
+ vector.setValueCount(numValues);
+ for (int i = 0; i < numValues; i++) {
+ vector.get(i, nullableHolders[i]);
+ assertEquals(typeWidth, nullableHolders[i].byteWidth);
+ assertTrue(nullableHolders[i].isSet > 0);
+ byte[] actual = new byte[typeWidth];
+ nullableHolders[i].buffer.getBytes(0, actual, 0, typeWidth);
+ assertArrayEquals(values[i], actual);
+ }
+ }
+
+ @Test
+ public void testSetWithInvalidInput() throws Exception {
+ String errorMsg = "input data needs to be at least " + typeWidth + " bytes";
+
+ // test small inputs, byteWidth matches but value or buffer is too small
+ try {
+ vector.set(0, smallValue);
+ failWithException(errorMsg);
+ } catch (AssertionError ignore) {
+ }
+
+ try {
+ vector.set(0, smallHolder);
+ failWithException(errorMsg);
+ } catch (AssertionError ignore) {
+ }
+
+ try {
+ vector.set(0, smallNullableHolder);
+ failWithException(errorMsg);
+ } catch (AssertionError ignore) {
+ }
+
+ try {
+ vector.set(0, smallBuf);
+ failWithException(errorMsg);
+ } catch (AssertionError ignore) {
+ }
+
+ // test large inputs, byteWidth matches but value or buffer is bigger than byteWidth
+ vector.set(0, largeValue);
+ vector.set(0, largeHolder);
+ vector.set(0, largeNullableHolder);
+ vector.set(0, largeBuf);
+ }
+
+ @Test
+ public void setSetSafeWithInvalidInput() throws Exception {
+ String errorMsg = "input data needs to be at least " + typeWidth + " bytes";
+
+ // test small inputs, byteWidth matches but value or buffer is too small
+ try {
+ vector.setSafe(0, smallValue);
+ failWithException(errorMsg);
+ } catch (AssertionError ignore) {
+ }
+
+ try {
+ vector.setSafe(0, smallHolder);
+ failWithException(errorMsg);
+ } catch (AssertionError ignore) {
+ }
+
+ try {
+ vector.setSafe(0, smallNullableHolder);
+ failWithException(errorMsg);
+ } catch (AssertionError ignore) {
+ }
+
+ try {
+ vector.setSafe(0, smallBuf);
+ failWithException(errorMsg);
+ } catch (AssertionError ignore) {
+ }
+
+ // test large inputs, byteWidth matches but value or buffer is bigger than byteWidth
+ vector.setSafe(0, largeValue);
+ vector.setSafe(0, largeHolder);
+ vector.setSafe(0, largeNullableHolder);
+ vector.setSafe(0, largeBuf);
+ }
+
+ @Test
+ public void testGetNull() {
+ vector.setNull(0);
+ assertNull(vector.get(0));
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java
new file mode 100644
index 000000000..9d7e413a7
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java
@@ -0,0 +1,507 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import java.math.BigDecimal;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.impl.UnionFixedSizeListReader;
+import org.apache.arrow.vector.complex.impl.UnionFixedSizeListWriter;
+import org.apache.arrow.vector.complex.impl.UnionListReader;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestFixedSizeListVector {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testIntType() {
+ try (FixedSizeListVector vector = FixedSizeListVector.empty("list", 2, allocator)) {
+ IntVector nested = (IntVector) vector.addOrGetVector(FieldType.nullable(MinorType.INT.getType())).getVector();
+ vector.allocateNew();
+
+ for (int i = 0; i < 10; i++) {
+ vector.setNotNull(i);
+ nested.set(i * 2, i);
+ nested.set(i * 2 + 1, i + 10);
+ }
+ vector.setValueCount(10);
+
+ UnionFixedSizeListReader reader = vector.getReader();
+ for (int i = 0; i < 10; i++) {
+ reader.setPosition(i);
+ Assert.assertTrue(reader.isSet());
+ Assert.assertTrue(reader.next());
+ assertEquals(i, reader.reader().readInteger().intValue());
+ Assert.assertTrue(reader.next());
+ assertEquals(i + 10, reader.reader().readInteger().intValue());
+ Assert.assertFalse(reader.next());
+ assertEquals(Arrays.asList(i, i + 10), reader.readObject());
+ }
+ }
+ }
+
+ @Test
+ public void testFloatTypeNullable() {
+ try (FixedSizeListVector vector = FixedSizeListVector.empty("list", 2, allocator)) {
+ Float4Vector nested = (Float4Vector) vector.addOrGetVector(FieldType.nullable(MinorType.FLOAT4.getType()))
+ .getVector();
+ vector.allocateNew();
+
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector.setNotNull(i);
+ nested.set(i * 2, i + 0.1f);
+ nested.set(i * 2 + 1, i + 10.1f);
+ }
+ }
+ vector.setValueCount(10);
+
+ UnionFixedSizeListReader reader = vector.getReader();
+ for (int i = 0; i < 10; i++) {
+ reader.setPosition(i);
+ if (i % 2 == 0) {
+ Assert.assertTrue(reader.isSet());
+ Assert.assertTrue(reader.next());
+ assertEquals(i + 0.1f, reader.reader().readFloat(), 0.00001);
+ Assert.assertTrue(reader.next());
+ assertEquals(i + 10.1f, reader.reader().readFloat(), 0.00001);
+ Assert.assertFalse(reader.next());
+ assertEquals(Arrays.asList(i + 0.1f, i + 10.1f), reader.readObject());
+ } else {
+ Assert.assertFalse(reader.isSet());
+ Assert.assertNull(reader.readObject());
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testNestedInList() {
+ try (ListVector vector = ListVector.empty("list", allocator)) {
+ FixedSizeListVector tuples = (FixedSizeListVector) vector.addOrGetVector(
+ FieldType.nullable(new ArrowType.FixedSizeList(2))).getVector();
+ IntVector innerVector = (IntVector) tuples.addOrGetVector(FieldType.nullable(MinorType.INT.getType()))
+ .getVector();
+ vector.allocateNew();
+
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ int position = vector.startNewValue(i);
+ for (int j = 0; j < i % 7; j++) {
+ tuples.setNotNull(position + j);
+ innerVector.set((position + j) * 2, j);
+ innerVector.set((position + j) * 2 + 1, j + 1);
+ }
+ vector.endValue(i, i % 7);
+ }
+ }
+ vector.setValueCount(10);
+
+ UnionListReader reader = vector.getReader();
+ for (int i = 0; i < 10; i++) {
+ reader.setPosition(i);
+ if (i % 2 == 0) {
+ for (int j = 0; j < i % 7; j++) {
+ Assert.assertTrue(reader.next());
+ FieldReader innerListReader = reader.reader();
+ for (int k = 0; k < 2; k++) {
+ Assert.assertTrue(innerListReader.next());
+ assertEquals(k + j, innerListReader.reader().readInteger().intValue());
+ }
+ Assert.assertFalse(innerListReader.next());
+ }
+ Assert.assertFalse(reader.next());
+ } else {
+ Assert.assertFalse(reader.isSet());
+ Assert.assertNull(reader.readObject());
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testTransferPair() {
+ try (FixedSizeListVector from = new FixedSizeListVector(
+ "from", allocator, new FieldType(true, new ArrowType.FixedSizeList(2), null), null);
+ FixedSizeListVector to = new FixedSizeListVector(
+ "to", allocator, new FieldType(true, new ArrowType.FixedSizeList(2), null), null)) {
+ Float4Vector nested = (Float4Vector) from.addOrGetVector(FieldType.nullable(MinorType.FLOAT4.getType()))
+ .getVector();
+ from.allocateNew();
+
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ from.setNotNull(i);
+ nested.set(i * 2, i + 0.1f);
+ nested.set(i * 2 + 1, i + 10.1f);
+ }
+ }
+ from.setValueCount(10);
+
+ TransferPair pair = from.makeTransferPair(to);
+
+ pair.copyValueSafe(0, 1);
+ pair.copyValueSafe(2, 2);
+ to.copyFromSafe(4, 3, from);
+
+ to.setValueCount(10);
+
+ UnionFixedSizeListReader reader = to.getReader();
+
+ reader.setPosition(0);
+ Assert.assertFalse(reader.isSet());
+ Assert.assertNull(reader.readObject());
+
+ reader.setPosition(1);
+ Assert.assertTrue(reader.isSet());
+ Assert.assertTrue(reader.next());
+ assertEquals(0.1f, reader.reader().readFloat(), 0.00001);
+ Assert.assertTrue(reader.next());
+ assertEquals(10.1f, reader.reader().readFloat(), 0.00001);
+ Assert.assertFalse(reader.next());
+ assertEquals(Arrays.asList(0.1f, 10.1f), reader.readObject());
+
+ reader.setPosition(2);
+ Assert.assertTrue(reader.isSet());
+ Assert.assertTrue(reader.next());
+ assertEquals(2.1f, reader.reader().readFloat(), 0.00001);
+ Assert.assertTrue(reader.next());
+ assertEquals(12.1f, reader.reader().readFloat(), 0.00001);
+ Assert.assertFalse(reader.next());
+ assertEquals(Arrays.asList(2.1f, 12.1f), reader.readObject());
+
+ reader.setPosition(3);
+ Assert.assertTrue(reader.isSet());
+ Assert.assertTrue(reader.next());
+ assertEquals(4.1f, reader.reader().readFloat(), 0.00001);
+ Assert.assertTrue(reader.next());
+ assertEquals(14.1f, reader.reader().readFloat(), 0.00001);
+ Assert.assertFalse(reader.next());
+ assertEquals(Arrays.asList(4.1f, 14.1f), reader.readObject());
+
+ for (int i = 4; i < 10; i++) {
+ reader.setPosition(i);
+ Assert.assertFalse(reader.isSet());
+ Assert.assertNull(reader.readObject());
+ }
+ }
+ }
+
+ @Test
+ public void testConsistentChildName() throws Exception {
+ try (FixedSizeListVector listVector = FixedSizeListVector.empty("sourceVector", 2, allocator)) {
+ String emptyListStr = listVector.getField().toString();
+ Assert.assertTrue(emptyListStr.contains(ListVector.DATA_VECTOR_NAME));
+
+ listVector.addOrGetVector(FieldType.nullable(MinorType.INT.getType()));
+ String emptyVectorStr = listVector.getField().toString();
+ Assert.assertTrue(emptyVectorStr.contains(ListVector.DATA_VECTOR_NAME));
+ }
+ }
+
+ @Test
+ public void testUnionFixedSizeListWriterWithNulls() throws Exception {
+ /* Write to a decimal list vector
+ * each list of size 3 and having its data values alternating between null and a non-null.
+ * Read and verify
+ */
+ try (final FixedSizeListVector vector = FixedSizeListVector.empty("vector", /*listSize=*/3, allocator)) {
+
+ UnionFixedSizeListWriter writer = vector.getWriter();
+ writer.allocate();
+
+ final int valueCount = 100;
+
+ for (int i = 0; i < valueCount; i++) {
+ writer.startList();
+ writer.decimal().writeDecimal(new BigDecimal(i));
+ writer.writeNull();
+ writer.decimal().writeDecimal(new BigDecimal(i * 3));
+ writer.endList();
+ }
+ vector.setValueCount(valueCount);
+
+ for (int i = 0; i < valueCount; i++) {
+ List<BigDecimal> values = (List<BigDecimal>) vector.getObject(i);
+ assertEquals(3, values.size());
+ assertEquals(new BigDecimal(i), values.get(0));
+ assertEquals(null, values.get(1));
+ assertEquals(new BigDecimal(i * 3), values.get(2));
+ }
+ }
+ }
+
+ @Test
+ public void testUnionFixedSizeListWriter() throws Exception {
+ try (final FixedSizeListVector vector1 = FixedSizeListVector.empty("vector", 3, allocator)) {
+
+ UnionFixedSizeListWriter writer1 = vector1.getWriter();
+ writer1.allocate();
+
+ int[] values1 = new int[] {1, 2, 3};
+ int[] values2 = new int[] {4, 5, 6};
+ int[] values3 = new int[] {7, 8, 9};
+
+ //set some values
+ writeListVector(vector1, writer1, values1);
+ writeListVector(vector1, writer1, values2);
+ writeListVector(vector1, writer1, values3);
+ writer1.setValueCount(3);
+
+ assertEquals(3, vector1.getValueCount());
+
+ int[] realValue1 = convertListToIntArray(vector1.getObject(0));
+ assertTrue(Arrays.equals(values1, realValue1));
+ int[] realValue2 = convertListToIntArray(vector1.getObject(1));
+ assertTrue(Arrays.equals(values2, realValue2));
+ int[] realValue3 = convertListToIntArray(vector1.getObject(2));
+ assertTrue(Arrays.equals(values3, realValue3));
+ }
+ }
+
+ @Test
+ public void testWriteDecimal() throws Exception {
+ try (final FixedSizeListVector vector = FixedSizeListVector.empty("vector", /*listSize=*/3, allocator)) {
+
+ UnionFixedSizeListWriter writer = vector.getWriter();
+ writer.allocate();
+
+ final int valueCount = 100;
+
+ for (int i = 0; i < valueCount; i++) {
+ writer.startList();
+ writer.decimal().writeDecimal(new BigDecimal(i));
+ writer.decimal().writeDecimal(new BigDecimal(i * 2));
+ writer.decimal().writeDecimal(new BigDecimal(i * 3));
+ writer.endList();
+ }
+ vector.setValueCount(valueCount);
+
+ for (int i = 0; i < valueCount; i++) {
+ List<BigDecimal> values = (List<BigDecimal>) vector.getObject(i);
+ assertEquals(3, values.size());
+ assertEquals(new BigDecimal(i), values.get(0));
+ assertEquals(new BigDecimal(i * 2), values.get(1));
+ assertEquals(new BigDecimal(i * 3), values.get(2));
+ }
+ }
+ }
+
+ @Test
+ public void testDecimalIndexCheck() throws Exception {
+ try (final FixedSizeListVector vector = FixedSizeListVector.empty("vector", /*listSize=*/3, allocator)) {
+
+ UnionFixedSizeListWriter writer = vector.getWriter();
+ writer.allocate();
+
+ IllegalStateException e = assertThrows(IllegalStateException.class, () -> {
+ writer.startList();
+ writer.decimal().writeDecimal(new BigDecimal(1));
+ writer.decimal().writeDecimal(new BigDecimal(2));
+ writer.decimal().writeDecimal(new BigDecimal(3));
+ writer.decimal().writeDecimal(new BigDecimal(4));
+ writer.endList();
+ });
+ assertEquals("values at index 0 is greater than listSize 3", e.getMessage());
+ }
+ }
+
+
+ @Test(expected = IllegalStateException.class)
+ public void testWriteIllegalData() throws Exception {
+ try (final FixedSizeListVector vector1 = FixedSizeListVector.empty("vector", 3, allocator)) {
+
+ UnionFixedSizeListWriter writer1 = vector1.getWriter();
+ writer1.allocate();
+
+ int[] values1 = new int[] {1, 2, 3};
+ int[] values2 = new int[] {4, 5, 6, 7, 8};
+
+ //set some values
+ writeListVector(vector1, writer1, values1);
+ writeListVector(vector1, writer1, values2);
+ writer1.setValueCount(3);
+
+ assertEquals(3, vector1.getValueCount());
+ int[] realValue1 = convertListToIntArray(vector1.getObject(0));
+ assertTrue(Arrays.equals(values1, realValue1));
+ int[] realValue2 = convertListToIntArray(vector1.getObject(1));
+ assertTrue(Arrays.equals(values2, realValue2));
+ }
+ }
+
+ @Test
+ public void testSplitAndTransfer() throws Exception {
+ try (final FixedSizeListVector vector1 = FixedSizeListVector.empty("vector", 3, allocator)) {
+
+ UnionFixedSizeListWriter writer1 = vector1.getWriter();
+ writer1.allocate();
+
+ int[] values1 = new int[] {1, 2, 3};
+ int[] values2 = new int[] {4, 5, 6};
+ int[] values3 = new int[] {7, 8, 9};
+
+ //set some values
+ writeListVector(vector1, writer1, values1);
+ writeListVector(vector1, writer1, values2);
+ writeListVector(vector1, writer1, values3);
+ writer1.setValueCount(3);
+
+ TransferPair transferPair = vector1.getTransferPair(allocator);
+ transferPair.splitAndTransfer(0, 2);
+ FixedSizeListVector targetVector = (FixedSizeListVector) transferPair.getTo();
+
+ assertEquals(2, targetVector.getValueCount());
+ int[] realValue1 = convertListToIntArray(targetVector.getObject(0));
+ assertTrue(Arrays.equals(values1, realValue1));
+ int[] realValue2 = convertListToIntArray(targetVector.getObject(1));
+ assertTrue(Arrays.equals(values2, realValue2));
+
+ targetVector.clear();
+ }
+ }
+
+ @Test
+ public void testZeroWidthVector() {
+ try (final FixedSizeListVector vector1 = FixedSizeListVector.empty("vector", 0, allocator)) {
+
+ UnionFixedSizeListWriter writer1 = vector1.getWriter();
+ writer1.allocate();
+
+ int[] values1 = new int[] {};
+ int[] values2 = new int[] {};
+ int[] values3 = null;
+ int[] values4 = new int[] {};
+
+ //set some values
+ writeListVector(vector1, writer1, values1);
+ writeListVector(vector1, writer1, values2);
+ writeListVector(vector1, writer1, values3);
+ writeListVector(vector1, writer1, values4);
+ writer1.setValueCount(4);
+
+ assertEquals(4, vector1.getValueCount());
+
+ int[] realValue1 = convertListToIntArray(vector1.getObject(0));
+ assertArrayEquals(values1, realValue1);
+ int[] realValue2 = convertListToIntArray(vector1.getObject(1));
+ assertArrayEquals(values2, realValue2);
+ assertNull(vector1.getObject(2));
+ int[] realValue4 = convertListToIntArray(vector1.getObject(3));
+ assertArrayEquals(values4, realValue4);
+ }
+ }
+
+ @Test
+ public void testVectorWithNulls() {
+ try (final FixedSizeListVector vector1 = FixedSizeListVector.empty("vector", 4, allocator)) {
+
+ UnionFixedSizeListWriter writer1 = vector1.getWriter();
+ writer1.allocate();
+
+ List<Integer> values1 = Arrays.asList(null, 1, 2, 3);
+ List<Integer> values2 = Arrays.asList(4, null, 5, 6);
+ List<Integer> values3 = null;
+ List<Integer> values4 = Arrays.asList(7, 8, null, 9);
+
+ //set some values
+ writeListVector(vector1, writer1, values1);
+ writeListVector(vector1, writer1, values2);
+ writeListVector(vector1, writer1, values3);
+ writeListVector(vector1, writer1, values4);
+ writer1.setValueCount(4);
+
+ assertEquals(4, vector1.getValueCount());
+
+ List realValue1 = vector1.getObject(0);
+ assertEquals(values1, realValue1);
+ List realValue2 = vector1.getObject(1);
+ assertEquals(values2, realValue2);
+ List realValue3 = vector1.getObject(2);
+ assertEquals(values3, realValue3);
+ List realValue4 = vector1.getObject(3);
+ assertEquals(values4, realValue4);
+ }
+ }
+
+ private int[] convertListToIntArray(List list) {
+ int[] values = new int[list.size()];
+ for (int i = 0; i < list.size(); i++) {
+ values[i] = (int) list.get(i);
+ }
+ return values;
+ }
+
+ private void writeListVector(FixedSizeListVector vector, UnionFixedSizeListWriter writer, int[] values) {
+ writer.startList();
+ if (values != null) {
+ for (int v : values) {
+ writer.integer().writeInt(v);
+ }
+ } else {
+ vector.setNull(writer.getPosition());
+ }
+ writer.endList();
+ }
+
+ private void writeListVector(FixedSizeListVector vector, UnionFixedSizeListWriter writer, List<Integer> values) {
+ writer.startList();
+ if (values != null) {
+ for (Integer v : values) {
+ if (v == null) {
+ writer.writeNull();
+ } else {
+ writer.integer().writeInt(v);
+ }
+ }
+ } else {
+ vector.setNull(writer.getPosition());
+ }
+ writer.endList();
+ }
+
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalMonthDayNanoVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalMonthDayNanoVector.java
new file mode 100644
index 000000000..93d6fab70
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalMonthDayNanoVector.java
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertEquals;
+
+
+import java.time.Duration;
+import java.time.Period;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.holders.IntervalMonthDayNanoHolder;
+import org.apache.arrow.vector.holders.NullableIntervalMonthDayNanoHolder;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestIntervalMonthDayNanoVector {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testBasics() {
+ try (final IntervalMonthDayNanoVector vector = new IntervalMonthDayNanoVector(/*name=*/"", allocator)) {
+ int valueCount = 100;
+ vector.setInitialCapacity(valueCount);
+ vector.allocateNew();
+ NullableIntervalMonthDayNanoHolder nullableHolder = new NullableIntervalMonthDayNanoHolder();
+ nullableHolder.isSet = 1;
+ nullableHolder.months = 2;
+ nullableHolder.days = 20;
+ nullableHolder.nanoseconds = 123;
+ IntervalMonthDayNanoHolder holder = new IntervalMonthDayNanoHolder();
+ holder.months = Integer.MIN_VALUE;
+ holder.days = Integer.MIN_VALUE;
+ holder.nanoseconds = Long.MIN_VALUE;
+
+
+ vector.set(0, /*months=*/1, /*days=*/2, /*nanoseconds=*/-2);
+ vector.setSafe(2, /*months=*/1, /*days=*/2, /*nanoseconds=*/-3);
+ vector.setSafe(/*index=*/4, nullableHolder);
+ vector.set(3, holder);
+ nullableHolder.isSet = 0;
+ vector.setSafe(/*index=*/5, nullableHolder);
+ vector.setValueCount(5);
+
+ assertEquals("P1M2D PT-0.000000002S ", vector.getAsStringBuilder(0).toString());
+ assertEquals(null, vector.getAsStringBuilder(1));
+ assertEquals("P1M2D PT-0.000000003S ", vector.getAsStringBuilder(2).toString());
+ assertEquals(new PeriodDuration(Period.of(0, Integer.MIN_VALUE, Integer.MIN_VALUE),
+ Duration.ofNanos(Long.MIN_VALUE)), vector.getObject(3));
+ assertEquals("P2M20D PT0.000000123S ", vector.getAsStringBuilder(4).toString());
+
+ assertEquals(null, vector.getObject(5));
+
+ vector.get(1, nullableHolder);
+ assertEquals(0, nullableHolder.isSet);
+
+ vector.get(2, nullableHolder);
+ assertEquals(1, nullableHolder.isSet);
+ assertEquals(1, nullableHolder.months);
+ assertEquals(2, nullableHolder.days);
+ assertEquals(-3, nullableHolder.nanoseconds);
+
+ IntervalMonthDayNanoVector.getDays(vector.valueBuffer, 2);
+ assertEquals(1, IntervalMonthDayNanoVector.getMonths(vector.valueBuffer, 2));
+ assertEquals(2, IntervalMonthDayNanoVector.getDays(vector.valueBuffer, 2));
+ assertEquals(-3, IntervalMonthDayNanoVector.getNanoseconds(vector.valueBuffer, 2));
+
+ assertEquals(0, vector.isSet(1));
+ assertEquals(1, vector.isSet(2));
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalYearVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalYearVector.java
new file mode 100644
index 000000000..5ea48b485
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalYearVector.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestIntervalYearVector {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testGetAsStringBuilder() {
+ try (final IntervalYearVector vector = new IntervalYearVector("", allocator)) {
+ int valueCount = 100;
+ vector.setInitialCapacity(valueCount);
+ vector.allocateNew();
+ for (int i = 0; i < valueCount; i++) {
+ vector.set(i, i);
+ }
+
+ assertEquals("0 years 1 month ", vector.getAsStringBuilder(1).toString());
+ assertEquals("0 years 10 months ", vector.getAsStringBuilder(10).toString());
+ assertEquals("1 year 8 months ", vector.getAsStringBuilder(20).toString());
+ assertEquals("2 years 6 months ", vector.getAsStringBuilder(30).toString());
+
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java
new file mode 100644
index 000000000..c1d60da4d
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java
@@ -0,0 +1,982 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.BaseRepeatedValueVector;
+import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.impl.UnionLargeListWriter;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestLargeListVector {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testCopyFrom() throws Exception {
+ try (LargeListVector inVector = LargeListVector.empty("input", allocator);
+ LargeListVector outVector = LargeListVector.empty("output", allocator)) {
+ UnionLargeListWriter writer = inVector.getWriter();
+ writer.allocate();
+
+ // populate input vector with the following records
+ // [1, 2, 3]
+ // null
+ // []
+ writer.setPosition(0); // optional
+ writer.startList();
+ writer.bigInt().writeBigInt(1);
+ writer.bigInt().writeBigInt(2);
+ writer.bigInt().writeBigInt(3);
+ writer.endList();
+
+ writer.setPosition(2);
+ writer.startList();
+ writer.endList();
+
+ writer.setValueCount(3);
+
+ // copy values from input to output
+ outVector.allocateNew();
+ for (int i = 0; i < 3; i++) {
+ outVector.copyFrom(i, i, inVector);
+ }
+ outVector.setValueCount(3);
+
+ // assert the output vector is correct
+ FieldReader reader = outVector.getReader();
+ Assert.assertTrue("shouldn't be null", reader.isSet());
+ reader.setPosition(1);
+ Assert.assertFalse("should be null", reader.isSet());
+ reader.setPosition(2);
+ Assert.assertTrue("shouldn't be null", reader.isSet());
+
+
+ /* index 0 */
+ Object result = outVector.getObject(0);
+ ArrayList<Long> resultSet = (ArrayList<Long>) result;
+ assertEquals(3, resultSet.size());
+ assertEquals(new Long(1), resultSet.get(0));
+ assertEquals(new Long(2), resultSet.get(1));
+ assertEquals(new Long(3), resultSet.get(2));
+
+ /* index 1 */
+ result = outVector.getObject(1);
+ assertNull(result);
+
+ /* index 2 */
+ result = outVector.getObject(2);
+ resultSet = (ArrayList<Long>) result;
+ assertEquals(0, resultSet.size());
+
+ /* 3+0+0/3 */
+ assertEquals(1.0D, inVector.getDensity(), 0);
+ }
+ }
+
+ @Test
+ public void testSetLastSetUsage() throws Exception {
+ try (LargeListVector listVector = LargeListVector.empty("input", allocator)) {
+
+ /* Explicitly add the dataVector */
+ MinorType type = MinorType.BIGINT;
+ listVector.addOrGetVector(FieldType.nullable(type.getType()));
+
+ /* allocate memory */
+ listVector.allocateNew();
+
+ /* get inner buffers; validityBuffer and offsetBuffer */
+
+ ArrowBuf validityBuffer = listVector.getValidityBuffer();
+ ArrowBuf offsetBuffer = listVector.getOffsetBuffer();
+
+ /* get the underlying data vector -- BigIntVector */
+ BigIntVector dataVector = (BigIntVector) listVector.getDataVector();
+
+ /* check current lastSet */
+ assertEquals(-1L, listVector.getLastSet());
+
+ int index = 0;
+ int offset = 0;
+
+ /* write [10, 11, 12] to the list vector at index 0 */
+ BitVectorHelper.setBit(validityBuffer, index);
+ dataVector.setSafe(0, 1, 10);
+ dataVector.setSafe(1, 1, 11);
+ dataVector.setSafe(2, 1, 12);
+ offsetBuffer.setLong((index + 1) * LargeListVector.OFFSET_WIDTH, 3);
+
+ index += 1;
+
+ /* write [13, 14] to the list vector at index 1 */
+ BitVectorHelper.setBit(validityBuffer, index);
+ dataVector.setSafe(3, 1, 13);
+ dataVector.setSafe(4, 1, 14);
+ offsetBuffer.setLong((index + 1) * LargeListVector.OFFSET_WIDTH, 5);
+
+ index += 1;
+
+ /* write [15, 16, 17] to the list vector at index 2 */
+ BitVectorHelper.setBit(validityBuffer, index);
+ dataVector.setSafe(5, 1, 15);
+ dataVector.setSafe(6, 1, 16);
+ dataVector.setSafe(7, 1, 17);
+ offsetBuffer.setLong((index + 1) * LargeListVector.OFFSET_WIDTH, 8);
+
+ /* check current lastSet */
+ assertEquals(-1L, listVector.getLastSet());
+
+ /* set lastset and arbitrary valuecount for list vector.
+ *
+ * NOTE: if we don't execute setLastSet() before setLastValueCount(), then
+ * the latter will corrupt the offsetBuffer and thus the accessor will not
+ * retrieve the correct values from underlying dataBuffer. Run the test
+ * by commenting out next line and we should see failures from 5th assert
+ * onwards. This is why doing setLastSet() is important before setValueCount()
+ * once the vector has been loaded.
+ *
+ * Another important thing to remember is the value of lastSet itself.
+ * Even though the listVector has elements till index 2 only, the lastSet should
+ * be set as 3. This is because the offsetBuffer has valid offsets filled till index 3.
+ * If we do setLastSet(2), the offsetBuffer at index 3 will contain incorrect value
+ * after execution of setValueCount().
+ *
+ * correct state of the listVector
+ * bitvector {1, 1, 1, 0, 0.... }
+ * offsetvector {0, 3, 5, 8, 8, 8.....}
+ * datavector { [10, 11, 12],
+ * [13, 14],
+ * [15, 16, 17]
+ * }
+ *
+ * if we don't do setLastSet() before setValueCount --> incorrect state
+ * bitvector {1, 1, 1, 0, 0.... }
+ * offsetvector {0, 0, 0, 0, 0, 0.....}
+ * datavector { [10, 11, 12],
+ * [13, 14],
+ * [15, 16, 17]
+ * }
+ *
+ * if we do setLastSet(2) before setValueCount --> incorrect state
+ * bitvector {1, 1, 1, 0, 0.... }
+ * offsetvector {0, 3, 5, 5, 5, 5.....}
+ * datavector { [10, 11, 12],
+ * [13, 14],
+ * [15, 16, 17]
+ * }
+ */
+ listVector.setLastSet(2);
+ listVector.setValueCount(10);
+
+ /* (3+2+3)/10 */
+ assertEquals(0.8D, listVector.getDensity(), 0);
+
+ index = 0;
+ offset = (int) offsetBuffer.getLong(index * LargeListVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(0), Integer.toString(offset));
+
+ Long actual = dataVector.getObject(offset);
+ assertEquals(new Long(10), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(11), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(12), actual);
+
+ index++;
+ offset = (int) offsetBuffer.getLong(index * LargeListVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(3), Integer.toString(offset));
+
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(13), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(14), actual);
+
+ index++;
+ offset = (int) offsetBuffer.getLong(index * LargeListVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(5), Integer.toString(offset));
+
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(15), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(16), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(17), actual);
+
+ index++;
+ offset = (int) offsetBuffer.getLong(index * LargeListVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(8), Integer.toString(offset));
+
+ actual = dataVector.getObject(offset);
+ assertNull(actual);
+ }
+ }
+
+ @Test
+ public void testSplitAndTransfer() throws Exception {
+ try (LargeListVector listVector = LargeListVector.empty("sourceVector", allocator)) {
+
+ /* Explicitly add the dataVector */
+ MinorType type = MinorType.BIGINT;
+ listVector.addOrGetVector(FieldType.nullable(type.getType()));
+
+ UnionLargeListWriter listWriter = listVector.getWriter();
+
+ /* allocate memory */
+ listWriter.allocate();
+
+ /* populate data */
+ listWriter.setPosition(0);
+ listWriter.startList();
+ listWriter.bigInt().writeBigInt(10);
+ listWriter.bigInt().writeBigInt(11);
+ listWriter.bigInt().writeBigInt(12);
+ listWriter.endList();
+
+ listWriter.setPosition(1);
+ listWriter.startList();
+ listWriter.bigInt().writeBigInt(13);
+ listWriter.bigInt().writeBigInt(14);
+ listWriter.endList();
+
+ listWriter.setPosition(2);
+ listWriter.startList();
+ listWriter.bigInt().writeBigInt(15);
+ listWriter.bigInt().writeBigInt(16);
+ listWriter.bigInt().writeBigInt(17);
+ listWriter.bigInt().writeBigInt(18);
+ listWriter.endList();
+
+ listWriter.setPosition(3);
+ listWriter.startList();
+ listWriter.bigInt().writeBigInt(19);
+ listWriter.endList();
+
+ listWriter.setPosition(4);
+ listWriter.startList();
+ listWriter.bigInt().writeBigInt(20);
+ listWriter.bigInt().writeBigInt(21);
+ listWriter.bigInt().writeBigInt(22);
+ listWriter.bigInt().writeBigInt(23);
+ listWriter.endList();
+
+ listVector.setValueCount(5);
+
+ assertEquals(4, listVector.getLastSet());
+
+ /* get offset buffer */
+ final ArrowBuf offsetBuffer = listVector.getOffsetBuffer();
+
+ /* get dataVector */
+ BigIntVector dataVector = (BigIntVector) listVector.getDataVector();
+
+ /* check the vector output */
+
+ int index = 0;
+ int offset = 0;
+ Long actual = null;
+
+ /* index 0 */
+ assertFalse(listVector.isNull(index));
+ offset = (int) offsetBuffer.getLong(index * LargeListVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(0), Integer.toString(offset));
+
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(10), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(11), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(12), actual);
+
+ /* index 1 */
+ index++;
+ assertFalse(listVector.isNull(index));
+ offset = (int) offsetBuffer.getLong(index * LargeListVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(3), Integer.toString(offset));
+
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(13), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(14), actual);
+
+ /* index 2 */
+ index++;
+ assertFalse(listVector.isNull(index));
+ offset = (int) offsetBuffer.getLong(index * LargeListVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(5), Integer.toString(offset));
+
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(15), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(16), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(17), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(18), actual);
+
+ /* index 3 */
+ index++;
+ assertFalse(listVector.isNull(index));
+ offset = (int) offsetBuffer.getLong(index * LargeListVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(9), Integer.toString(offset));
+
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(19), actual);
+
+ /* index 4 */
+ index++;
+ assertFalse(listVector.isNull(index));
+ offset = (int) offsetBuffer.getLong(index * LargeListVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(10), Integer.toString(offset));
+
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(20), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(21), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(22), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(23), actual);
+
+ /* index 5 */
+ index++;
+ assertTrue(listVector.isNull(index));
+ offset = (int) offsetBuffer.getLong(index * LargeListVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(14), Integer.toString(offset));
+
+ /* do split and transfer */
+ try (LargeListVector toVector = LargeListVector.empty("toVector", allocator)) {
+
+ TransferPair transferPair = listVector.makeTransferPair(toVector);
+
+ int[][] transferLengths = {{0, 2}, {3, 1}, {4, 1}};
+
+ for (final int[] transferLength : transferLengths) {
+ int start = transferLength[0];
+ int splitLength = transferLength[1];
+
+ int dataLength1 = 0;
+ int dataLength2 = 0;
+
+ int offset1 = 0;
+ int offset2 = 0;
+
+ transferPair.splitAndTransfer(start, splitLength);
+
+ /* get offsetBuffer of toVector */
+ final ArrowBuf toOffsetBuffer = toVector.getOffsetBuffer();
+
+ /* get dataVector of toVector */
+ BigIntVector dataVector1 = (BigIntVector) toVector.getDataVector();
+
+ for (int i = 0; i < splitLength; i++) {
+ dataLength1 = (int) offsetBuffer.getLong((start + i + 1) * LargeListVector.OFFSET_WIDTH) -
+ (int) offsetBuffer.getLong((start + i) * LargeListVector.OFFSET_WIDTH);
+ dataLength2 = (int) toOffsetBuffer.getLong((i + 1) * LargeListVector.OFFSET_WIDTH) -
+ (int) toOffsetBuffer.getLong(i * LargeListVector.OFFSET_WIDTH);
+
+ assertEquals("Different data lengths at index: " + i + " and start: " + start,
+ dataLength1, dataLength2);
+
+ offset1 = (int) offsetBuffer.getLong((start + i) * LargeListVector.OFFSET_WIDTH);
+ offset2 = (int) toOffsetBuffer.getLong(i * LargeListVector.OFFSET_WIDTH);
+
+ for (int j = 0; j < dataLength1; j++) {
+ assertEquals("Different data at indexes: " + offset1 + " and " + offset2,
+ dataVector.getObject(offset1), dataVector1.getObject(offset2));
+
+ offset1++;
+ offset2++;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testNestedLargeListVector() throws Exception {
+ try (LargeListVector listVector = LargeListVector.empty("sourceVector", allocator)) {
+
+ UnionLargeListWriter listWriter = listVector.getWriter();
+
+ /* allocate memory */
+ listWriter.allocate();
+
+ /* the dataVector that backs a listVector will also be a
+ * listVector for this test.
+ */
+
+ /* write one or more inner lists at index 0 */
+ listWriter.setPosition(0);
+ listWriter.startList();
+
+ listWriter.list().startList();
+ listWriter.list().bigInt().writeBigInt(50);
+ listWriter.list().bigInt().writeBigInt(100);
+ listWriter.list().bigInt().writeBigInt(200);
+ listWriter.list().endList();
+
+ listWriter.list().startList();
+ listWriter.list().bigInt().writeBigInt(75);
+ listWriter.list().bigInt().writeBigInt(125);
+ listWriter.list().bigInt().writeBigInt(150);
+ listWriter.list().bigInt().writeBigInt(175);
+ listWriter.list().endList();
+
+ listWriter.endList();
+
+ /* write one or more inner lists at index 1 */
+ listWriter.setPosition(1);
+ listWriter.startList();
+
+ listWriter.list().startList();
+ listWriter.list().bigInt().writeBigInt(10);
+ listWriter.list().endList();
+
+ listWriter.list().startList();
+ listWriter.list().bigInt().writeBigInt(15);
+ listWriter.list().bigInt().writeBigInt(20);
+ listWriter.list().endList();
+
+ listWriter.list().startList();
+ listWriter.list().bigInt().writeBigInt(25);
+ listWriter.list().bigInt().writeBigInt(30);
+ listWriter.list().bigInt().writeBigInt(35);
+ listWriter.list().endList();
+
+ listWriter.endList();
+
+ assertEquals(1, listVector.getLastSet());
+
+ listVector.setValueCount(2);
+
+ assertEquals(2, listVector.getValueCount());
+
+ /* get listVector value at index 0 -- the value itself is a listvector */
+ Object result = listVector.getObject(0);
+ ArrayList<ArrayList<Long>> resultSet = (ArrayList<ArrayList<Long>>) result;
+ ArrayList<Long> list;
+
+ assertEquals(2, resultSet.size()); /* 2 inner lists at index 0 */
+ assertEquals(3, resultSet.get(0).size()); /* size of first inner list */
+ assertEquals(4, resultSet.get(1).size()); /* size of second inner list */
+
+ list = resultSet.get(0);
+ assertEquals(new Long(50), list.get(0));
+ assertEquals(new Long(100), list.get(1));
+ assertEquals(new Long(200), list.get(2));
+
+ list = resultSet.get(1);
+ assertEquals(new Long(75), list.get(0));
+ assertEquals(new Long(125), list.get(1));
+ assertEquals(new Long(150), list.get(2));
+ assertEquals(new Long(175), list.get(3));
+
+ /* get listVector value at index 1 -- the value itself is a listvector */
+ result = listVector.getObject(1);
+ resultSet = (ArrayList<ArrayList<Long>>) result;
+
+ assertEquals(3, resultSet.size()); /* 3 inner lists at index 1 */
+ assertEquals(1, resultSet.get(0).size()); /* size of first inner list */
+ assertEquals(2, resultSet.get(1).size()); /* size of second inner list */
+ assertEquals(3, resultSet.get(2).size()); /* size of third inner list */
+
+ list = resultSet.get(0);
+ assertEquals(new Long(10), list.get(0));
+
+ list = resultSet.get(1);
+ assertEquals(new Long(15), list.get(0));
+ assertEquals(new Long(20), list.get(1));
+
+ list = resultSet.get(2);
+ assertEquals(new Long(25), list.get(0));
+ assertEquals(new Long(30), list.get(1));
+ assertEquals(new Long(35), list.get(2));
+
+ /* check underlying bitVector */
+ assertFalse(listVector.isNull(0));
+ assertFalse(listVector.isNull(1));
+
+ /* check underlying offsets */
+ final ArrowBuf offsetBuffer = listVector.getOffsetBuffer();
+
+ /* listVector has 2 lists at index 0 and 3 lists at index 1 */
+ assertEquals(0, offsetBuffer.getLong(0 * LargeListVector.OFFSET_WIDTH));
+ assertEquals(2, offsetBuffer.getLong(1 * LargeListVector.OFFSET_WIDTH));
+ assertEquals(5, offsetBuffer.getLong(2 * LargeListVector.OFFSET_WIDTH));
+ }
+ }
+
+ @Test
+ public void testNestedLargeListVector1() throws Exception {
+ try (LargeListVector listVector = LargeListVector.empty("sourceVector", allocator)) {
+
+ MinorType listType = MinorType.LIST;
+ MinorType scalarType = MinorType.BIGINT;
+
+ listVector.addOrGetVector(FieldType.nullable(listType.getType()));
+
+ ListVector innerList1 = (ListVector) listVector.getDataVector();
+ innerList1.addOrGetVector(FieldType.nullable(listType.getType()));
+
+ ListVector innerList2 = (ListVector) innerList1.getDataVector();
+ innerList2.addOrGetVector(FieldType.nullable(listType.getType()));
+
+ ListVector innerList3 = (ListVector) innerList2.getDataVector();
+ innerList3.addOrGetVector(FieldType.nullable(listType.getType()));
+
+ ListVector innerList4 = (ListVector) innerList3.getDataVector();
+ innerList4.addOrGetVector(FieldType.nullable(listType.getType()));
+
+ ListVector innerList5 = (ListVector) innerList4.getDataVector();
+ innerList5.addOrGetVector(FieldType.nullable(listType.getType()));
+
+ ListVector innerList6 = (ListVector) innerList5.getDataVector();
+ innerList6.addOrGetVector(FieldType.nullable(scalarType.getType()));
+
+ listVector.setInitialCapacity(128);
+ }
+ }
+
+ @Test
+ public void testNestedLargeListVector2() throws Exception {
+ try (LargeListVector listVector = LargeListVector.empty("sourceVector", allocator)) {
+ listVector.setInitialCapacity(1);
+ UnionLargeListWriter listWriter = listVector.getWriter();
+ /* allocate memory */
+ listWriter.allocate();
+
+ /* write one or more inner lists at index 0 */
+ listWriter.setPosition(0);
+ listWriter.startList();
+
+ listWriter.list().startList();
+ listWriter.list().bigInt().writeBigInt(50);
+ listWriter.list().bigInt().writeBigInt(100);
+ listWriter.list().bigInt().writeBigInt(200);
+ listWriter.list().endList();
+
+ listWriter.list().startList();
+ listWriter.list().bigInt().writeBigInt(75);
+ listWriter.list().bigInt().writeBigInt(125);
+ listWriter.list().endList();
+
+ listWriter.endList();
+
+ /* write one or more inner lists at index 1 */
+ listWriter.setPosition(1);
+ listWriter.startList();
+
+ listWriter.list().startList();
+ listWriter.list().bigInt().writeBigInt(15);
+ listWriter.list().bigInt().writeBigInt(20);
+ listWriter.list().endList();
+
+ listWriter.list().startList();
+ listWriter.list().bigInt().writeBigInt(25);
+ listWriter.list().bigInt().writeBigInt(30);
+ listWriter.list().bigInt().writeBigInt(35);
+ listWriter.list().endList();
+
+ listWriter.endList();
+
+ assertEquals(1, listVector.getLastSet());
+
+ listVector.setValueCount(2);
+
+ assertEquals(2, listVector.getValueCount());
+
+ /* get listVector value at index 0 -- the value itself is a listvector */
+ Object result = listVector.getObject(0);
+ ArrayList<ArrayList<Long>> resultSet = (ArrayList<ArrayList<Long>>) result;
+ ArrayList<Long> list;
+
+ assertEquals(2, resultSet.size()); /* 2 inner lists at index 0 */
+ assertEquals(3, resultSet.get(0).size()); /* size of first inner list */
+ assertEquals(2, resultSet.get(1).size()); /* size of second inner list */
+
+ list = resultSet.get(0);
+ assertEquals(new Long(50), list.get(0));
+ assertEquals(new Long(100), list.get(1));
+ assertEquals(new Long(200), list.get(2));
+
+ list = resultSet.get(1);
+ assertEquals(new Long(75), list.get(0));
+ assertEquals(new Long(125), list.get(1));
+
+ /* get listVector value at index 1 -- the value itself is a listvector */
+ result = listVector.getObject(1);
+ resultSet = (ArrayList<ArrayList<Long>>) result;
+
+ assertEquals(2, resultSet.size()); /* 3 inner lists at index 1 */
+ assertEquals(2, resultSet.get(0).size()); /* size of first inner list */
+ assertEquals(3, resultSet.get(1).size()); /* size of second inner list */
+
+ list = resultSet.get(0);
+ assertEquals(new Long(15), list.get(0));
+ assertEquals(new Long(20), list.get(1));
+
+ list = resultSet.get(1);
+ assertEquals(new Long(25), list.get(0));
+ assertEquals(new Long(30), list.get(1));
+ assertEquals(new Long(35), list.get(2));
+
+ /* check underlying bitVector */
+ assertFalse(listVector.isNull(0));
+ assertFalse(listVector.isNull(1));
+
+ /* check underlying offsets */
+ final ArrowBuf offsetBuffer = listVector.getOffsetBuffer();
+
+ /* listVector has 2 lists at index 0 and 3 lists at index 1 */
+ assertEquals(0, offsetBuffer.getLong(0 * LargeListVector.OFFSET_WIDTH));
+ assertEquals(2, offsetBuffer.getLong(1 * LargeListVector.OFFSET_WIDTH));
+ assertEquals(4, offsetBuffer.getLong(2 * LargeListVector.OFFSET_WIDTH));
+ }
+ }
+
+ @Test
+ public void testGetBufferAddress() throws Exception {
+ try (LargeListVector listVector = LargeListVector.empty("vector", allocator)) {
+
+ UnionLargeListWriter listWriter = listVector.getWriter();
+ boolean error = false;
+
+ listWriter.allocate();
+
+ listWriter.setPosition(0);
+ listWriter.startList();
+ listWriter.bigInt().writeBigInt(50);
+ listWriter.bigInt().writeBigInt(100);
+ listWriter.bigInt().writeBigInt(200);
+ listWriter.endList();
+
+ listWriter.setPosition(1);
+ listWriter.startList();
+ listWriter.bigInt().writeBigInt(250);
+ listWriter.bigInt().writeBigInt(300);
+ listWriter.endList();
+
+ listVector.setValueCount(2);
+
+ /* check listVector contents */
+ Object result = listVector.getObject(0);
+ ArrayList<Long> resultSet = (ArrayList<Long>) result;
+ assertEquals(3, resultSet.size());
+ assertEquals(new Long(50), resultSet.get(0));
+ assertEquals(new Long(100), resultSet.get(1));
+ assertEquals(new Long(200), resultSet.get(2));
+
+ result = listVector.getObject(1);
+ resultSet = (ArrayList<Long>) result;
+ assertEquals(2, resultSet.size());
+ assertEquals(new Long(250), resultSet.get(0));
+ assertEquals(new Long(300), resultSet.get(1));
+
+ List<ArrowBuf> buffers = listVector.getFieldBuffers();
+
+ long bitAddress = listVector.getValidityBufferAddress();
+ long offsetAddress = listVector.getOffsetBufferAddress();
+
+ try {
+ long dataAddress = listVector.getDataBufferAddress();
+ } catch (UnsupportedOperationException ue) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ }
+
+ assertEquals(2, buffers.size());
+ assertEquals(bitAddress, buffers.get(0).memoryAddress());
+ assertEquals(offsetAddress, buffers.get(1).memoryAddress());
+
+ /* (3+2)/2 */
+ assertEquals(2.5, listVector.getDensity(), 0);
+ }
+ }
+
+ @Test
+ public void testConsistentChildName() throws Exception {
+ try (LargeListVector listVector = LargeListVector.empty("sourceVector", allocator)) {
+ String emptyListStr = listVector.getField().toString();
+ assertTrue(emptyListStr.contains(LargeListVector.DATA_VECTOR_NAME));
+
+ listVector.addOrGetVector(FieldType.nullable(MinorType.INT.getType()));
+ String emptyVectorStr = listVector.getField().toString();
+ assertTrue(emptyVectorStr.contains(LargeListVector.DATA_VECTOR_NAME));
+ }
+ }
+
+ @Test
+ public void testSetInitialCapacity() {
+ try (final LargeListVector vector = LargeListVector.empty("", allocator)) {
+ vector.addOrGetVector(FieldType.nullable(MinorType.INT.getType()));
+
+ /**
+ * use the default multiplier of 5,
+ * 512 * 5 => 2560 * 4 => 10240 bytes => 16KB => 4096 value capacity.
+ */
+ vector.setInitialCapacity(512);
+ vector.allocateNew();
+ assertEquals(512, vector.getValueCapacity());
+ assertTrue(vector.getDataVector().getValueCapacity() >= 512 * 5);
+
+ /* use density as 4 */
+ vector.setInitialCapacity(512, 4);
+ vector.allocateNew();
+ assertEquals(512, vector.getValueCapacity());
+ assertTrue(vector.getDataVector().getValueCapacity() >= 512 * 4);
+
+ /**
+ * inner value capacity we pass to data vector is 512 * 0.1 => 51
+ * For an int vector this is 204 bytes of memory for data buffer
+ * and 7 bytes for validity buffer.
+ * and with power of 2 allocation, we allocate 256 bytes and 8 bytes
+ * for the data buffer and validity buffer of the inner vector. Thus
+ * value capacity of inner vector is 64
+ */
+ vector.setInitialCapacity(512, 0.1);
+ vector.allocateNew();
+ assertEquals(512, vector.getValueCapacity());
+ assertTrue(vector.getDataVector().getValueCapacity() >= 51);
+
+ /**
+ * inner value capacity we pass to data vector is 512 * 0.01 => 5
+ * For an int vector this is 20 bytes of memory for data buffer
+ * and 1 byte for validity buffer.
+ * and with power of 2 allocation, we allocate 32 bytes and 1 bytes
+ * for the data buffer and validity buffer of the inner vector. Thus
+ * value capacity of inner vector is 8
+ */
+ vector.setInitialCapacity(512, 0.01);
+ vector.allocateNew();
+ assertEquals(512, vector.getValueCapacity());
+ assertTrue(vector.getDataVector().getValueCapacity() >= 5);
+
+ /**
+ * inner value capacity we pass to data vector is 5 * 0.1 => 0
+ * which is then rounded off to 1. So we pass value count as 1
+ * to the inner int vector.
+ * the offset buffer of the list vector is allocated for 6 values
+ * which is 24 bytes and then rounded off to 32 bytes (8 values)
+ * the validity buffer of the list vector is allocated for 5
+ * values which is 1 byte. This is why value capacity of the list
+ * vector is 7 as we take the min of validity buffer value capacity
+ * and offset buffer value capacity.
+ */
+ vector.setInitialCapacity(5, 0.1);
+ vector.allocateNew();
+ assertEquals(7, vector.getValueCapacity());
+ assertTrue(vector.getDataVector().getValueCapacity() >= 1);
+ }
+ }
+
+ @Test
+ public void testClearAndReuse() {
+ try (final LargeListVector vector = LargeListVector.empty("list", allocator)) {
+ BigIntVector bigIntVector =
+ (BigIntVector) vector.addOrGetVector(FieldType.nullable(MinorType.BIGINT.getType())).getVector();
+ vector.setInitialCapacity(10);
+ vector.allocateNew();
+
+ vector.startNewValue(0);
+ bigIntVector.setSafe(0, 7);
+ vector.endValue(0, 1);
+ vector.startNewValue(1);
+ bigIntVector.setSafe(1, 8);
+ vector.endValue(1, 1);
+ vector.setValueCount(2);
+
+ Object result = vector.getObject(0);
+ ArrayList<Long> resultSet = (ArrayList<Long>) result;
+ assertEquals(new Long(7), resultSet.get(0));
+
+ result = vector.getObject(1);
+ resultSet = (ArrayList<Long>) result;
+ assertEquals(new Long(8), resultSet.get(0));
+
+ // Clear and release the buffers to trigger a realloc when adding next value
+ vector.clear();
+
+ // The list vector should reuse a buffer when reallocating the offset buffer
+ vector.startNewValue(0);
+ bigIntVector.setSafe(0, 7);
+ vector.endValue(0, 1);
+ vector.startNewValue(1);
+ bigIntVector.setSafe(1, 8);
+ vector.endValue(1, 1);
+ vector.setValueCount(2);
+
+ result = vector.getObject(0);
+ resultSet = (ArrayList<Long>) result;
+ assertEquals(new Long(7), resultSet.get(0));
+
+ result = vector.getObject(1);
+ resultSet = (ArrayList<Long>) result;
+ assertEquals(new Long(8), resultSet.get(0));
+ }
+ }
+
+ @Test
+ public void testWriterGetField() {
+ try (final LargeListVector vector = LargeListVector.empty("list", allocator)) {
+
+ UnionLargeListWriter writer = vector.getWriter();
+ writer.allocate();
+
+ //set some values
+ writer.startList();
+ writer.integer().writeInt(1);
+ writer.integer().writeInt(2);
+ writer.endList();
+ vector.setValueCount(2);
+
+ Field expectedDataField = new Field(BaseRepeatedValueVector.DATA_VECTOR_NAME,
+ FieldType.nullable(new ArrowType.Int(32, true)), null);
+ Field expectedField = new Field(vector.getName(), FieldType.nullable(ArrowType.LargeList.INSTANCE),
+ Arrays.asList(expectedDataField));
+
+ assertEquals(expectedField, writer.getField());
+ }
+ }
+
+ @Test
+ public void testClose() throws Exception {
+ try (final LargeListVector vector = LargeListVector.empty("list", allocator)) {
+
+ UnionLargeListWriter writer = vector.getWriter();
+ writer.allocate();
+
+ //set some values
+ writer.startList();
+ writer.integer().writeInt(1);
+ writer.integer().writeInt(2);
+ writer.endList();
+ vector.setValueCount(2);
+
+ assertTrue(vector.getBufferSize() > 0);
+ assertTrue(vector.getDataVector().getBufferSize() > 0);
+
+ writer.close();
+ assertEquals(0, vector.getBufferSize());
+ assertEquals(0, vector.getDataVector().getBufferSize());
+ }
+ }
+
+ @Test
+ public void testGetBufferSizeFor() {
+ try (final LargeListVector vector = LargeListVector.empty("list", allocator)) {
+
+ UnionLargeListWriter writer = vector.getWriter();
+ writer.allocate();
+
+ //set some values
+ writeIntValues(writer, new int[] {1, 2});
+ writeIntValues(writer, new int[] {3, 4});
+ writeIntValues(writer, new int[] {5, 6});
+ writeIntValues(writer, new int[] {7, 8, 9, 10});
+ writeIntValues(writer, new int[] {11, 12, 13, 14});
+ writer.setValueCount(5);
+
+ IntVector dataVector = (IntVector) vector.getDataVector();
+ int[] indices = new int[] {0, 2, 4, 6, 10, 14};
+
+ for (int valueCount = 1; valueCount <= 5; valueCount++) {
+ int validityBufferSize = BitVectorHelper.getValidityBufferSize(valueCount);
+ int offsetBufferSize = (valueCount + 1) * LargeListVector.OFFSET_WIDTH;
+
+ int expectedSize = validityBufferSize + offsetBufferSize + dataVector.getBufferSizeFor(indices[valueCount]);
+ assertEquals(expectedSize, vector.getBufferSizeFor(valueCount));
+ }
+ }
+ }
+
+ @Test
+ public void testIsEmpty() {
+ try (final LargeListVector vector = LargeListVector.empty("list", allocator)) {
+ UnionLargeListWriter writer = vector.getWriter();
+ writer.allocate();
+
+ // set values [1,2], null, [], [5,6]
+ writeIntValues(writer, new int[] {1, 2});
+ writer.setPosition(2);
+ writeIntValues(writer, new int[] {});
+ writeIntValues(writer, new int[] {5, 6});
+ writer.setValueCount(4);
+
+ assertFalse(vector.isEmpty(0));
+ assertTrue(vector.isNull(1));
+ assertTrue(vector.isEmpty(1));
+ assertFalse(vector.isNull(2));
+ assertTrue(vector.isEmpty(2));
+ assertFalse(vector.isEmpty(3));
+ }
+ }
+
+ private void writeIntValues(UnionLargeListWriter writer, int[] values) {
+ writer.startList();
+ for (int v: values) {
+ writer.integer().writeInt(v);
+ }
+ writer.endList();
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarBinaryVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarBinaryVector.java
new file mode 100644
index 000000000..644827ce9
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarBinaryVector.java
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.holders.NullableLargeVarBinaryHolder;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestLargeVarBinaryVector {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void prepare() {
+ allocator = new RootAllocator(Integer.MAX_VALUE);
+ }
+
+ @After
+ public void shutdown() {
+ allocator.close();
+ }
+
+ @Test
+ public void testSetNullableLargeVarBinaryHolder() {
+ try (LargeVarBinaryVector vector = new LargeVarBinaryVector("", allocator)) {
+ vector.allocateNew(100, 10);
+
+ NullableLargeVarBinaryHolder nullHolder = new NullableLargeVarBinaryHolder();
+ nullHolder.isSet = 0;
+
+ NullableLargeVarBinaryHolder binHolder = new NullableLargeVarBinaryHolder();
+ binHolder.isSet = 1;
+
+ String str = "hello";
+ ArrowBuf buf = allocator.buffer(16);
+ buf.setBytes(0, str.getBytes());
+
+ binHolder.start = 0;
+ binHolder.end = str.length();
+ binHolder.buffer = buf;
+
+ vector.set(0, nullHolder);
+ vector.set(1, binHolder);
+
+ // verify results
+ assertTrue(vector.isNull(0));
+ assertEquals(str, new String(vector.get(1)));
+
+ buf.close();
+ }
+ }
+
+ @Test
+ public void testSetNullableLargeVarBinaryHolderSafe() {
+ try (LargeVarBinaryVector vector = new LargeVarBinaryVector("", allocator)) {
+ vector.allocateNew(5, 1);
+
+ NullableLargeVarBinaryHolder nullHolder = new NullableLargeVarBinaryHolder();
+ nullHolder.isSet = 0;
+
+ NullableLargeVarBinaryHolder binHolder = new NullableLargeVarBinaryHolder();
+ binHolder.isSet = 1;
+
+ String str = "hello world";
+ ArrowBuf buf = allocator.buffer(16);
+ buf.setBytes(0, str.getBytes());
+
+ binHolder.start = 0;
+ binHolder.end = str.length();
+ binHolder.buffer = buf;
+
+ vector.setSafe(0, binHolder);
+ vector.setSafe(1, nullHolder);
+
+ // verify results
+ assertEquals(str, new String(vector.get(0)));
+ assertTrue(vector.isNull(1));
+
+ buf.close();
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java
new file mode 100644
index 000000000..1b81c6b20
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java
@@ -0,0 +1,816 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.OutOfMemoryException;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.holders.NullableLargeVarCharHolder;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.testing.ValueVectorDataPopulator;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.util.OversizedAllocationException;
+import org.apache.arrow.vector.util.TransferPair;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.jupiter.api.Assertions;
+
+public class TestLargeVarCharVector {
+
+ private static final byte[] STR1 = "AAAAA1".getBytes();
+ private static final byte[] STR2 = "BBBBBBBBB2".getBytes();
+ private static final byte[] STR3 = "CCCC3".getBytes();
+ private static final byte[] STR4 = "DDDDDDDD4".getBytes();
+ private static final byte[] STR5 = "EEE5".getBytes();
+ private static final byte[] STR6 = "FFFFF6".getBytes();
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void prepare() {
+ allocator = new RootAllocator(Integer.MAX_VALUE);
+ }
+
+ @After
+ public void shutdown() {
+ allocator.close();
+ }
+
+ @Test
+ public void testTransfer() {
+ try (BufferAllocator childAllocator1 = allocator.newChildAllocator("child1", 1000000, 1000000);
+ BufferAllocator childAllocator2 = allocator.newChildAllocator("child2", 1000000, 1000000);
+ LargeVarCharVector v1 = new LargeVarCharVector("v1", childAllocator1);
+ LargeVarCharVector v2 = new LargeVarCharVector("v2", childAllocator2);) {
+ v1.allocateNew();
+ v1.setSafe(4094, "hello world".getBytes(), 0, 11);
+ v1.setValueCount(4001);
+
+ long memoryBeforeTransfer = childAllocator1.getAllocatedMemory();
+
+ v1.makeTransferPair(v2).transfer();
+
+ assertEquals(0, childAllocator1.getAllocatedMemory());
+ assertEquals(memoryBeforeTransfer, childAllocator2.getAllocatedMemory());
+ }
+ }
+
+ @Test
+ public void testCopyValueSafe() {
+ try (final LargeVarCharVector largeVarCharVector = new LargeVarCharVector("myvector", allocator);
+ final LargeVarCharVector newLargeVarCharVector = new LargeVarCharVector("newvector", allocator)) {
+ largeVarCharVector.allocateNew(10000, 1000);
+
+ final int valueCount = 500;
+ populateLargeVarcharVector(largeVarCharVector, valueCount, null);
+
+ final TransferPair tp = largeVarCharVector.makeTransferPair(newLargeVarCharVector);
+
+ // new vector memory is not pre-allocated, we expect copyValueSafe work fine.
+ for (int i = 0; i < valueCount; i++) {
+ tp.copyValueSafe(i, i);
+ }
+ newLargeVarCharVector.setValueCount(valueCount);
+
+ for (int i = 0; i < valueCount; i++) {
+ final boolean expectedSet = (i % 3) == 0;
+ if (expectedSet) {
+ assertFalse(largeVarCharVector.isNull(i));
+ assertFalse(newLargeVarCharVector.isNull(i));
+ assertArrayEquals(largeVarCharVector.get(i), newLargeVarCharVector.get(i));
+ } else {
+ assertTrue(newLargeVarCharVector.isNull(i));
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testSplitAndTransferNon() {
+ try (final LargeVarCharVector largeVarCharVector = new LargeVarCharVector("myvector", allocator)) {
+
+ largeVarCharVector.allocateNew(10000, 1000);
+ final int valueCount = 500;
+ populateLargeVarcharVector(largeVarCharVector, valueCount, null);
+
+ final TransferPair tp = largeVarCharVector.getTransferPair(allocator);
+ try (LargeVarCharVector newLargeVarCharVector = (LargeVarCharVector) tp.getTo()) {
+
+ tp.splitAndTransfer(0, 0);
+ assertEquals(0, newLargeVarCharVector.getValueCount());
+ }
+ }
+ }
+
+ @Test
+ public void testSplitAndTransferAll() {
+ try (final LargeVarCharVector largeVarCharVector = new LargeVarCharVector("myvector", allocator)) {
+
+ largeVarCharVector.allocateNew(10000, 1000);
+ final int valueCount = 500;
+ populateLargeVarcharVector(largeVarCharVector, valueCount, null);
+
+ final TransferPair tp = largeVarCharVector.getTransferPair(allocator);
+ try (LargeVarCharVector newLargeVarCharVector = (LargeVarCharVector) tp.getTo()) {
+
+ tp.splitAndTransfer(0, valueCount);
+ assertEquals(valueCount, newLargeVarCharVector.getValueCount());
+ }
+ }
+ }
+
+ @Test
+ public void testInvalidStartIndex() {
+ try (final LargeVarCharVector largeVarCharVector = new LargeVarCharVector("myvector", allocator);
+ final LargeVarCharVector newLargeVarCharVector = new LargeVarCharVector("newvector", allocator)) {
+
+ largeVarCharVector.allocateNew(10000, 1000);
+ final int valueCount = 500;
+ populateLargeVarcharVector(largeVarCharVector, valueCount, null);
+
+ final TransferPair tp = largeVarCharVector.makeTransferPair(newLargeVarCharVector);
+
+ IllegalArgumentException e = Assertions.assertThrows(
+ IllegalArgumentException.class,
+ () -> tp.splitAndTransfer(valueCount, 10));
+
+ assertEquals("Invalid startIndex: 500", e.getMessage());
+ }
+ }
+
+ @Test
+ public void testInvalidLength() {
+ try (final LargeVarCharVector largeVarCharVector = new LargeVarCharVector("myvector", allocator);
+ final LargeVarCharVector newLargeVarCharVector = new LargeVarCharVector("newvector", allocator)) {
+
+ largeVarCharVector.allocateNew(10000, 1000);
+ final int valueCount = 500;
+ populateLargeVarcharVector(largeVarCharVector, valueCount, null);
+
+ final TransferPair tp = largeVarCharVector.makeTransferPair(newLargeVarCharVector);
+
+ IllegalArgumentException e = Assertions.assertThrows(
+ IllegalArgumentException.class,
+ () -> tp.splitAndTransfer(0, valueCount * 2));
+
+ assertEquals("Invalid length: 1000", e.getMessage());
+ }
+ }
+
+ @Test /* LargeVarCharVector */
+ public void testSizeOfValueBuffer() {
+ try (final LargeVarCharVector vector = new LargeVarCharVector("", allocator)) {
+ int valueCount = 100;
+ int currentSize = 0;
+ vector.setInitialCapacity(valueCount);
+ vector.allocateNew();
+ vector.setValueCount(valueCount);
+ for (int i = 0; i < valueCount; i++) {
+ currentSize += i;
+ vector.setSafe(i, new byte[i]);
+ }
+
+ assertEquals(currentSize, vector.sizeOfValueBuffer());
+ }
+ }
+
+ @Test
+ public void testSetLastSetUsage() {
+ final byte[] STR1 = "AAAAA1".getBytes();
+ final byte[] STR2 = "BBBBBBBBB2".getBytes();
+ final byte[] STR3 = "CCCC3".getBytes();
+ final byte[] STR4 = "DDDDDDDD4".getBytes();
+ final byte[] STR5 = "EEE5".getBytes();
+ final byte[] STR6 = "FFFFF6".getBytes();
+
+ try (final LargeVarCharVector vector = new LargeVarCharVector("myvector", allocator)) {
+ vector.allocateNew(1024 * 10, 1024);
+
+ setBytes(0, STR1, vector);
+ setBytes(1, STR2, vector);
+ setBytes(2, STR3, vector);
+ setBytes(3, STR4, vector);
+ setBytes(4, STR5, vector);
+ setBytes(5, STR6, vector);
+
+ /* Check current lastSet */
+ assertEquals(-1, vector.getLastSet());
+
+ /* Check the vector output */
+ assertArrayEquals(STR1, vector.get(0));
+ assertArrayEquals(STR2, vector.get(1));
+ assertArrayEquals(STR3, vector.get(2));
+ assertArrayEquals(STR4, vector.get(3));
+ assertArrayEquals(STR5, vector.get(4));
+ assertArrayEquals(STR6, vector.get(5));
+
+ /*
+ * If we don't do setLastSe(5) before setValueCount(), then the latter will corrupt
+ * the value vector by filling in all positions [0,valuecount-1] will empty byte arrays.
+ * Run the test by commenting out next line and we should see incorrect vector output.
+ */
+ vector.setLastSet(5);
+ vector.setValueCount(20);
+
+ /* Check current lastSet */
+ assertEquals(19, vector.getLastSet());
+
+ /* Check the vector output again */
+ assertArrayEquals(STR1, vector.get(0));
+ assertArrayEquals(STR2, vector.get(1));
+ assertArrayEquals(STR3, vector.get(2));
+ assertArrayEquals(STR4, vector.get(3));
+ assertArrayEquals(STR5, vector.get(4));
+ assertArrayEquals(STR6, vector.get(5));
+ assertEquals(0, vector.getValueLength(6));
+ assertEquals(0, vector.getValueLength(7));
+ assertEquals(0, vector.getValueLength(8));
+ assertEquals(0, vector.getValueLength(9));
+ assertEquals(0, vector.getValueLength(10));
+ assertEquals(0, vector.getValueLength(11));
+ assertEquals(0, vector.getValueLength(12));
+ assertEquals(0, vector.getValueLength(13));
+ assertEquals(0, vector.getValueLength(14));
+ assertEquals(0, vector.getValueLength(15));
+ assertEquals(0, vector.getValueLength(16));
+ assertEquals(0, vector.getValueLength(17));
+ assertEquals(0, vector.getValueLength(18));
+ assertEquals(0, vector.getValueLength(19));
+
+ /* Check offsets */
+ assertEquals(0, vector.offsetBuffer.getLong(0 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(6, vector.offsetBuffer.getLong(1 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(16, vector.offsetBuffer.getLong(2 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(21, vector.offsetBuffer.getLong(3 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(30, vector.offsetBuffer.getLong(4 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(34, vector.offsetBuffer.getLong(5 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getLong(6 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getLong(7 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getLong(8 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getLong(9 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getLong(10 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getLong(11 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getLong(12 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getLong(13 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getLong(14 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getLong(15 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getLong(16 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getLong(17 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getLong(18 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getLong(19 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+
+ vector.set(19, STR6);
+ assertArrayEquals(STR6, vector.get(19));
+ assertEquals(40, vector.offsetBuffer.getLong(19 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(46, vector.offsetBuffer.getLong(20 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ }
+ }
+
+ @Test(expected = OutOfMemoryException.class)
+ public void testVectorAllocateNew() {
+ try (RootAllocator smallAllocator = new RootAllocator(200);
+ LargeVarCharVector vector = new LargeVarCharVector("vec", smallAllocator)) {
+ vector.allocateNew();
+ }
+ }
+
+ @Test(expected = OversizedAllocationException.class)
+ public void testLargeVariableVectorReallocation() {
+ final LargeVarCharVector vector = new LargeVarCharVector("vector", allocator);
+ // edge case 1: value count = MAX_VALUE_ALLOCATION
+ final long expectedAllocationInBytes = BaseValueVector.MAX_ALLOCATION_SIZE;
+ final int expectedOffsetSize = 10;
+ try {
+ vector.allocateNew(expectedAllocationInBytes, 10);
+ assertTrue(expectedOffsetSize <= vector.getValueCapacity());
+ assertTrue(expectedAllocationInBytes <= vector.getDataBuffer().capacity());
+ vector.reAlloc();
+ assertTrue(expectedOffsetSize * 2 <= vector.getValueCapacity());
+ assertTrue(expectedAllocationInBytes * 2 <= vector.getDataBuffer().capacity());
+ } finally {
+ vector.close();
+ }
+
+ // common: value count < MAX_VALUE_ALLOCATION
+ try {
+ vector.allocateNew(BaseValueVector.MAX_ALLOCATION_SIZE / 2, 0);
+ vector.reAlloc(); // value allocation reaches to MAX_VALUE_ALLOCATION
+ vector.reAlloc(); // this tests if it overflows
+ } finally {
+ vector.close();
+ }
+ }
+
+ @Test
+ public void testSplitAndTransfer() {
+ try (final LargeVarCharVector largeVarCharVector = new LargeVarCharVector("myvector", allocator)) {
+ largeVarCharVector.allocateNew(10000, 1000);
+
+ final int valueCount = 500;
+ final String[] compareArray = new String[valueCount];
+
+ populateLargeVarcharVector(largeVarCharVector, valueCount, compareArray);
+
+ final TransferPair tp = largeVarCharVector.getTransferPair(allocator);
+ try (final LargeVarCharVector newLargeVarCharVector = (LargeVarCharVector) tp.getTo()) {
+ final int[][] startLengths = {{0, 201}, {201, 0}, {201, 200}, {401, 99}};
+
+ for (final int[] startLength : startLengths) {
+ final int start = startLength[0];
+ final int length = startLength[1];
+ tp.splitAndTransfer(start, length);
+ for (int i = 0; i < length; i++) {
+ final boolean expectedSet = ((start + i) % 3) == 0;
+ if (expectedSet) {
+ final byte[] expectedValue = compareArray[start + i].getBytes();
+ assertFalse(newLargeVarCharVector.isNull(i));
+ assertArrayEquals(expectedValue, newLargeVarCharVector.get(i));
+ } else {
+ assertTrue(newLargeVarCharVector.isNull(i));
+ }
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testReallocAfterVectorTransfer() {
+ final byte[] STR1 = "AAAAA1".getBytes();
+ final byte[] STR2 = "BBBBBBBBB2".getBytes();
+
+ try (final LargeVarCharVector vector = new LargeVarCharVector("vector", allocator)) {
+ /* 4096 values with 10 byte per record */
+ vector.allocateNew(4096 * 10, 4096);
+ int valueCapacity = vector.getValueCapacity();
+ assertTrue(valueCapacity >= 4096);
+
+ /* populate the vector */
+ for (int i = 0; i < valueCapacity; i++) {
+ if ((i & 1) == 1) {
+ vector.set(i, STR1);
+ } else {
+ vector.set(i, STR2);
+ }
+ }
+
+ /* Check the vector output */
+ for (int i = 0; i < valueCapacity; i++) {
+ if ((i & 1) == 1) {
+ assertArrayEquals(STR1, vector.get(i));
+ } else {
+ assertArrayEquals(STR2, vector.get(i));
+ }
+ }
+
+ /* trigger first realloc */
+ vector.setSafe(valueCapacity, STR2, 0, STR2.length);
+ assertTrue(vector.getValueCapacity() >= 2 * valueCapacity);
+ while (vector.getByteCapacity() < 10 * vector.getValueCapacity()) {
+ vector.reallocDataBuffer();
+ }
+
+ /* populate the remaining vector */
+ for (int i = valueCapacity; i < vector.getValueCapacity(); i++) {
+ if ((i & 1) == 1) {
+ vector.set(i, STR1);
+ } else {
+ vector.set(i, STR2);
+ }
+ }
+
+ /* Check the vector output */
+ valueCapacity = vector.getValueCapacity();
+ for (int i = 0; i < valueCapacity; i++) {
+ if ((i & 1) == 1) {
+ assertArrayEquals(STR1, vector.get(i));
+ } else {
+ assertArrayEquals(STR2, vector.get(i));
+ }
+ }
+
+ /* trigger second realloc */
+ vector.setSafe(valueCapacity + 10, STR2, 0, STR2.length);
+ assertTrue(vector.getValueCapacity() >= 2 * valueCapacity);
+ while (vector.getByteCapacity() < 10 * vector.getValueCapacity()) {
+ vector.reallocDataBuffer();
+ }
+
+ /* populate the remaining vector */
+ for (int i = valueCapacity; i < vector.getValueCapacity(); i++) {
+ if ((i & 1) == 1) {
+ vector.set(i, STR1);
+ } else {
+ vector.set(i, STR2);
+ }
+ }
+
+ /* Check the vector output */
+ valueCapacity = vector.getValueCapacity();
+ for (int i = 0; i < valueCapacity; i++) {
+ if ((i & 1) == 1) {
+ assertArrayEquals(STR1, vector.get(i));
+ } else {
+ assertArrayEquals(STR2, vector.get(i));
+ }
+ }
+
+ /* we are potentially working with 4x the size of vector buffer
+ * that we initially started with. Now let's transfer the vector.
+ */
+
+ TransferPair transferPair = vector.getTransferPair(allocator);
+ transferPair.transfer();
+ try (LargeVarCharVector toVector = (LargeVarCharVector) transferPair.getTo()) {
+ valueCapacity = toVector.getValueCapacity();
+
+ for (int i = 0; i < valueCapacity; i++) {
+ if ((i & 1) == 1) {
+ assertArrayEquals(STR1, toVector.get(i));
+ } else {
+ assertArrayEquals(STR2, toVector.get(i));
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testVectorLoadUnload() {
+ try (final LargeVarCharVector vector1 = new LargeVarCharVector("myvector", allocator)) {
+
+ ValueVectorDataPopulator.setVector(vector1, STR1, STR2, STR3, STR4, STR5, STR6);
+
+ assertEquals(5, vector1.getLastSet());
+ vector1.setValueCount(15);
+ assertEquals(14, vector1.getLastSet());
+
+ /* Check the vector output */
+ assertArrayEquals(STR1, vector1.get(0));
+ assertArrayEquals(STR2, vector1.get(1));
+ assertArrayEquals(STR3, vector1.get(2));
+ assertArrayEquals(STR4, vector1.get(3));
+ assertArrayEquals(STR5, vector1.get(4));
+ assertArrayEquals(STR6, vector1.get(5));
+
+ Field field = vector1.getField();
+ String fieldName = field.getName();
+
+ List<Field> fields = new ArrayList<>();
+ List<FieldVector> fieldVectors = new ArrayList<>();
+
+ fields.add(field);
+ fieldVectors.add(vector1);
+
+ Schema schema = new Schema(fields);
+
+ VectorSchemaRoot schemaRoot1 = new VectorSchemaRoot(schema, fieldVectors, vector1.getValueCount());
+ VectorUnloader vectorUnloader = new VectorUnloader(schemaRoot1);
+
+ try (
+ ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch();
+ VectorSchemaRoot schemaRoot2 = VectorSchemaRoot.create(schema, allocator);
+ ) {
+
+ VectorLoader vectorLoader = new VectorLoader(schemaRoot2);
+ vectorLoader.load(recordBatch);
+
+ LargeVarCharVector vector2 = (LargeVarCharVector) schemaRoot2.getVector(fieldName);
+ /*
+ * lastSet would have internally been set by VectorLoader.load() when it invokes
+ * loadFieldBuffers.
+ */
+ assertEquals(14, vector2.getLastSet());
+ vector2.setValueCount(25);
+ assertEquals(24, vector2.getLastSet());
+
+ /* Check the vector output */
+ assertArrayEquals(STR1, vector2.get(0));
+ assertArrayEquals(STR2, vector2.get(1));
+ assertArrayEquals(STR3, vector2.get(2));
+ assertArrayEquals(STR4, vector2.get(3));
+ assertArrayEquals(STR5, vector2.get(4));
+ assertArrayEquals(STR6, vector2.get(5));
+ }
+ }
+ }
+
+ @Test
+ public void testFillEmptiesUsage() {
+ try (final LargeVarCharVector vector = new LargeVarCharVector("myvector", allocator)) {
+
+ vector.allocateNew(1024 * 10, 1024);
+
+ setBytes(0, STR1, vector);
+ setBytes(1, STR2, vector);
+ setBytes(2, STR3, vector);
+ setBytes(3, STR4, vector);
+ setBytes(4, STR5, vector);
+ setBytes(5, STR6, vector);
+
+ /* Check current lastSet */
+ assertEquals(-1, vector.getLastSet());
+
+ /* Check the vector output */
+ assertArrayEquals(STR1, vector.get(0));
+ assertArrayEquals(STR2, vector.get(1));
+ assertArrayEquals(STR3, vector.get(2));
+ assertArrayEquals(STR4, vector.get(3));
+ assertArrayEquals(STR5, vector.get(4));
+ assertArrayEquals(STR6, vector.get(5));
+
+ vector.setLastSet(5);
+ /* fill empty byte arrays from index [6, 9] */
+ vector.fillEmpties(10);
+
+ /* Check current lastSet */
+ assertEquals(9, vector.getLastSet());
+
+ /* Check the vector output */
+ assertArrayEquals(STR1, vector.get(0));
+ assertArrayEquals(STR2, vector.get(1));
+ assertArrayEquals(STR3, vector.get(2));
+ assertArrayEquals(STR4, vector.get(3));
+ assertArrayEquals(STR5, vector.get(4));
+ assertArrayEquals(STR6, vector.get(5));
+ assertEquals(0, vector.getValueLength(6));
+ assertEquals(0, vector.getValueLength(7));
+ assertEquals(0, vector.getValueLength(8));
+ assertEquals(0, vector.getValueLength(9));
+
+ setBytes(10, STR1, vector);
+ setBytes(11, STR2, vector);
+
+ vector.setLastSet(11);
+ /* fill empty byte arrays from index [12, 14] */
+ vector.setValueCount(15);
+
+ /* Check current lastSet */
+ assertEquals(14, vector.getLastSet());
+
+ /* Check the vector output */
+ assertArrayEquals(STR1, vector.get(0));
+ assertArrayEquals(STR2, vector.get(1));
+ assertArrayEquals(STR3, vector.get(2));
+ assertArrayEquals(STR4, vector.get(3));
+ assertArrayEquals(STR5, vector.get(4));
+ assertArrayEquals(STR6, vector.get(5));
+ assertEquals(0, vector.getValueLength(6));
+ assertEquals(0, vector.getValueLength(7));
+ assertEquals(0, vector.getValueLength(8));
+ assertEquals(0, vector.getValueLength(9));
+ assertArrayEquals(STR1, vector.get(10));
+ assertArrayEquals(STR2, vector.get(11));
+ assertEquals(0, vector.getValueLength(12));
+ assertEquals(0, vector.getValueLength(13));
+ assertEquals(0, vector.getValueLength(14));
+
+ /* Check offsets */
+ assertEquals(0,
+ vector.offsetBuffer.getLong(0 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(6,
+ vector.offsetBuffer.getLong(1 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(16,
+ vector.offsetBuffer.getLong(2 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(21,
+ vector.offsetBuffer.getLong(3 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(30,
+ vector.offsetBuffer.getLong(4 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(34,
+ vector.offsetBuffer.getLong(5 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+
+ assertEquals(40,
+ vector.offsetBuffer.getLong(6 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40,
+ vector.offsetBuffer.getLong(7 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40,
+ vector.offsetBuffer.getLong(8 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40,
+ vector.offsetBuffer.getLong(9 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40,
+ vector.offsetBuffer.getLong(10 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+
+ assertEquals(46,
+ vector.offsetBuffer.getLong(11 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(56,
+ vector.offsetBuffer.getLong(12 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+
+ assertEquals(56,
+ vector.offsetBuffer.getLong(13 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(56,
+ vector.offsetBuffer.getLong(14 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(56,
+ vector.offsetBuffer.getLong(15 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ }
+ }
+
+ @Test
+ public void testGetBufferAddress1() {
+ try (final LargeVarCharVector vector = new LargeVarCharVector("myvector", allocator)) {
+
+ ValueVectorDataPopulator.setVector(vector, STR1, STR2, STR3, STR4, STR5, STR6);
+ vector.setValueCount(15);
+
+ /* check the vector output */
+ assertArrayEquals(STR1, vector.get(0));
+ assertArrayEquals(STR2, vector.get(1));
+ assertArrayEquals(STR3, vector.get(2));
+ assertArrayEquals(STR4, vector.get(3));
+ assertArrayEquals(STR5, vector.get(4));
+ assertArrayEquals(STR6, vector.get(5));
+
+ List<ArrowBuf> buffers = vector.getFieldBuffers();
+ long bitAddress = vector.getValidityBufferAddress();
+ long offsetAddress = vector.getOffsetBufferAddress();
+ long dataAddress = vector.getDataBufferAddress();
+
+ assertEquals(3, buffers.size());
+ assertEquals(bitAddress, buffers.get(0).memoryAddress());
+ assertEquals(offsetAddress, buffers.get(1).memoryAddress());
+ assertEquals(dataAddress, buffers.get(2).memoryAddress());
+ }
+ }
+
+ @Test
+ public void testSetNullableLargeVarCharHolder() {
+ try (LargeVarCharVector vector = new LargeVarCharVector("", allocator)) {
+ vector.allocateNew(100, 10);
+
+ NullableLargeVarCharHolder nullHolder = new NullableLargeVarCharHolder();
+ nullHolder.isSet = 0;
+
+ NullableLargeVarCharHolder stringHolder = new NullableLargeVarCharHolder();
+ stringHolder.isSet = 1;
+
+ String str = "hello";
+ ArrowBuf buf = allocator.buffer(16);
+ buf.setBytes(0, str.getBytes());
+
+ stringHolder.start = 0;
+ stringHolder.end = str.length();
+ stringHolder.buffer = buf;
+
+ vector.set(0, nullHolder);
+ vector.set(1, stringHolder);
+
+ // verify results
+ assertTrue(vector.isNull(0));
+ assertEquals(str, new String(vector.get(1)));
+
+ buf.close();
+ }
+ }
+
+ @Test
+ public void testSetNullableLargeVarCharHolderSafe() {
+ try (LargeVarCharVector vector = new LargeVarCharVector("", allocator)) {
+ vector.allocateNew(5, 1);
+
+ NullableLargeVarCharHolder nullHolder = new NullableLargeVarCharHolder();
+ nullHolder.isSet = 0;
+
+ NullableLargeVarCharHolder stringHolder = new NullableLargeVarCharHolder();
+ stringHolder.isSet = 1;
+
+ String str = "hello world";
+ ArrowBuf buf = allocator.buffer(16);
+ buf.setBytes(0, str.getBytes());
+
+ stringHolder.start = 0;
+ stringHolder.end = str.length();
+ stringHolder.buffer = buf;
+
+ vector.setSafe(0, stringHolder);
+ vector.setSafe(1, nullHolder);
+
+ // verify results
+ assertEquals(str, new String(vector.get(0)));
+ assertTrue(vector.isNull(1));
+
+ buf.close();
+ }
+ }
+
+ @Test
+ public void testGetNullFromLargeVariableWidthVector() {
+ try (final LargeVarCharVector largeVarCharVector = new LargeVarCharVector("largevarcharvec", allocator);
+ final LargeVarBinaryVector largeVarBinaryVector = new LargeVarBinaryVector("largevarbinary", allocator)) {
+ largeVarCharVector.allocateNew(10, 1);
+ largeVarBinaryVector.allocateNew(10, 1);
+
+ largeVarCharVector.setNull(0);
+ largeVarBinaryVector.setNull(0);
+
+ assertNull(largeVarCharVector.get(0));
+ assertNull(largeVarBinaryVector.get(0));
+ }
+ }
+
+ @Test
+ public void testLargeVariableWidthVectorNullHashCode() {
+ try (LargeVarCharVector largeVarChVec = new LargeVarCharVector("large var char vector", allocator)) {
+ largeVarChVec.allocateNew(100, 1);
+ largeVarChVec.setValueCount(1);
+
+ largeVarChVec.set(0, "abc".getBytes());
+ largeVarChVec.setNull(0);
+
+ assertEquals(0, largeVarChVec.hashCode(0));
+ }
+ }
+
+ @Test
+ public void testUnloadLargeVariableWidthVector() {
+ try (final LargeVarCharVector largeVarCharVector = new LargeVarCharVector("var char", allocator)) {
+ largeVarCharVector.allocateNew(5, 2);
+ largeVarCharVector.setValueCount(2);
+
+ largeVarCharVector.set(0, "abcd".getBytes());
+
+ List<ArrowBuf> bufs = largeVarCharVector.getFieldBuffers();
+ assertEquals(3, bufs.size());
+
+ ArrowBuf offsetBuf = bufs.get(1);
+ ArrowBuf dataBuf = bufs.get(2);
+
+ assertEquals(24, offsetBuf.writerIndex());
+ assertEquals(4, offsetBuf.getLong(8));
+ assertEquals(4, offsetBuf.getLong(16));
+
+ assertEquals(4, dataBuf.writerIndex());
+ }
+ }
+
+ @Test
+ public void testNullableType() {
+ try (final LargeVarCharVector vector = new LargeVarCharVector("", allocator)) {
+ vector.setInitialCapacity(512);
+ vector.allocateNew();
+
+ assertTrue(vector.getValueCapacity() >= 512);
+ int initialCapacity = vector.getValueCapacity();
+
+ try {
+ vector.set(initialCapacity, "foo".getBytes(StandardCharsets.UTF_8));
+ Assert.fail("Expected out of bounds exception");
+ } catch (Exception e) {
+ // ok
+ }
+
+ vector.reAlloc();
+ assertTrue(vector.getValueCapacity() >= 2 * initialCapacity);
+
+ vector.set(initialCapacity, "foo".getBytes(StandardCharsets.UTF_8));
+ assertEquals("foo", new String(vector.get(initialCapacity), StandardCharsets.UTF_8));
+ }
+ }
+
+ private void populateLargeVarcharVector(final LargeVarCharVector vector, int valueCount, String[] values) {
+ for (int i = 0; i < valueCount; i += 3) {
+ final String s = String.format("%010d", i);
+ vector.set(i, s.getBytes());
+ if (values != null) {
+ values[i] = s;
+ }
+ }
+ vector.setValueCount(valueCount);
+ }
+
+ public static void setBytes(int index, byte[] bytes, LargeVarCharVector vector) {
+ final long currentOffset = vector.offsetBuffer.getLong((long) index * BaseLargeVariableWidthVector.OFFSET_WIDTH);
+
+ BitVectorHelper.setBit(vector.validityBuffer, index);
+ vector.offsetBuffer.setLong(
+ (long) (index + 1) * BaseLargeVariableWidthVector.OFFSET_WIDTH, currentOffset + bytes.length);
+ vector.valueBuffer.setBytes(currentOffset, bytes, 0, bytes.length);
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java
new file mode 100644
index 000000000..ffeedf04d
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java
@@ -0,0 +1,981 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.BaseRepeatedValueVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.impl.UnionListWriter;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestListVector {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testCopyFrom() throws Exception {
+ try (ListVector inVector = ListVector.empty("input", allocator);
+ ListVector outVector = ListVector.empty("output", allocator)) {
+ UnionListWriter writer = inVector.getWriter();
+ writer.allocate();
+
+ // populate input vector with the following records
+ // [1, 2, 3]
+ // null
+ // []
+ writer.setPosition(0); // optional
+ writer.startList();
+ writer.bigInt().writeBigInt(1);
+ writer.bigInt().writeBigInt(2);
+ writer.bigInt().writeBigInt(3);
+ writer.endList();
+
+ writer.setPosition(2);
+ writer.startList();
+ writer.endList();
+
+ writer.setValueCount(3);
+
+ // copy values from input to output
+ outVector.allocateNew();
+ for (int i = 0; i < 3; i++) {
+ outVector.copyFrom(i, i, inVector);
+ }
+ outVector.setValueCount(3);
+
+ // assert the output vector is correct
+ FieldReader reader = outVector.getReader();
+ Assert.assertTrue("shouldn't be null", reader.isSet());
+ reader.setPosition(1);
+ Assert.assertFalse("should be null", reader.isSet());
+ reader.setPosition(2);
+ Assert.assertTrue("shouldn't be null", reader.isSet());
+
+
+ /* index 0 */
+ Object result = outVector.getObject(0);
+ ArrayList<Long> resultSet = (ArrayList<Long>) result;
+ assertEquals(3, resultSet.size());
+ assertEquals(new Long(1), (Long) resultSet.get(0));
+ assertEquals(new Long(2), (Long) resultSet.get(1));
+ assertEquals(new Long(3), (Long) resultSet.get(2));
+
+ /* index 1 */
+ result = outVector.getObject(1);
+ assertNull(result);
+
+ /* index 2 */
+ result = outVector.getObject(2);
+ resultSet = (ArrayList<Long>) result;
+ assertEquals(0, resultSet.size());
+
+ /* 3+0+0/3 */
+ assertEquals(1.0D, inVector.getDensity(), 0);
+ }
+ }
+
+ @Test
+ public void testSetLastSetUsage() throws Exception {
+ try (ListVector listVector = ListVector.empty("input", allocator)) {
+
+ /* Explicitly add the dataVector */
+ MinorType type = MinorType.BIGINT;
+ listVector.addOrGetVector(FieldType.nullable(type.getType()));
+
+ /* allocate memory */
+ listVector.allocateNew();
+
+ /* get inner buffers; validityBuffer and offsetBuffer */
+
+ ArrowBuf validityBuffer = listVector.getValidityBuffer();
+ ArrowBuf offsetBuffer = listVector.getOffsetBuffer();
+
+ /* get the underlying data vector -- BigIntVector */
+ BigIntVector dataVector = (BigIntVector) listVector.getDataVector();
+
+ /* check current lastSet */
+ assertEquals(Integer.toString(-1), Integer.toString(listVector.getLastSet()));
+
+ int index = 0;
+ int offset = 0;
+
+ /* write [10, 11, 12] to the list vector at index 0 */
+ BitVectorHelper.setBit(validityBuffer, index);
+ dataVector.setSafe(0, 1, 10);
+ dataVector.setSafe(1, 1, 11);
+ dataVector.setSafe(2, 1, 12);
+ offsetBuffer.setInt((index + 1) * ListVector.OFFSET_WIDTH, 3);
+
+ index += 1;
+
+ /* write [13, 14] to the list vector at index 1 */
+ BitVectorHelper.setBit(validityBuffer, index);
+ dataVector.setSafe(3, 1, 13);
+ dataVector.setSafe(4, 1, 14);
+ offsetBuffer.setInt((index + 1) * ListVector.OFFSET_WIDTH, 5);
+
+ index += 1;
+
+ /* write [15, 16, 17] to the list vector at index 2 */
+ BitVectorHelper.setBit(validityBuffer, index);
+ dataVector.setSafe(5, 1, 15);
+ dataVector.setSafe(6, 1, 16);
+ dataVector.setSafe(7, 1, 17);
+ offsetBuffer.setInt((index + 1) * ListVector.OFFSET_WIDTH, 8);
+
+ /* check current lastSet */
+ assertEquals(Integer.toString(-1), Integer.toString(listVector.getLastSet()));
+
+ /* set lastset and arbitrary valuecount for list vector.
+ *
+ * NOTE: if we don't execute setLastSet() before setLastValueCount(), then
+ * the latter will corrupt the offsetBuffer and thus the accessor will not
+ * retrieve the correct values from underlying dataBuffer. Run the test
+ * by commenting out next line and we should see failures from 5th assert
+ * onwards. This is why doing setLastSet() is important before setValueCount()
+ * once the vector has been loaded.
+ *
+ * Another important thing to remember is the value of lastSet itself.
+ * Even though the listVector has elements till index 2 only, the lastSet should
+ * be set as 3. This is because the offsetBuffer has valid offsets filled till index 3.
+ * If we do setLastSet(2), the offsetBuffer at index 3 will contain incorrect value
+ * after execution of setValueCount().
+ *
+ * correct state of the listVector
+ * bitvector {1, 1, 1, 0, 0.... }
+ * offsetvector {0, 3, 5, 8, 8, 8.....}
+ * datavector { [10, 11, 12],
+ * [13, 14],
+ * [15, 16, 17]
+ * }
+ *
+ * if we don't do setLastSet() before setValueCount --> incorrect state
+ * bitvector {1, 1, 1, 0, 0.... }
+ * offsetvector {0, 0, 0, 0, 0, 0.....}
+ * datavector { [10, 11, 12],
+ * [13, 14],
+ * [15, 16, 17]
+ * }
+ *
+ * if we do setLastSet(2) before setValueCount --> incorrect state
+ * bitvector {1, 1, 1, 0, 0.... }
+ * offsetvector {0, 3, 5, 5, 5, 5.....}
+ * datavector { [10, 11, 12],
+ * [13, 14],
+ * [15, 16, 17]
+ * }
+ */
+ listVector.setLastSet(2);
+ listVector.setValueCount(10);
+
+ /* (3+2+3)/10 */
+ assertEquals(0.8D, listVector.getDensity(), 0);
+
+ index = 0;
+ offset = offsetBuffer.getInt(index * ListVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(0), Integer.toString(offset));
+
+ Long actual = dataVector.getObject(offset);
+ assertEquals(new Long(10), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(11), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(12), actual);
+
+ index++;
+ offset = offsetBuffer.getInt(index * ListVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(3), Integer.toString(offset));
+
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(13), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(14), actual);
+
+ index++;
+ offset = offsetBuffer.getInt(index * ListVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(5), Integer.toString(offset));
+
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(15), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(16), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(17), actual);
+
+ index++;
+ offset = offsetBuffer.getInt(index * ListVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(8), Integer.toString(offset));
+
+ actual = dataVector.getObject(offset);
+ assertNull(actual);
+ }
+ }
+
+ @Test
+ public void testSplitAndTransfer() throws Exception {
+ try (ListVector listVector = ListVector.empty("sourceVector", allocator)) {
+
+ /* Explicitly add the dataVector */
+ MinorType type = MinorType.BIGINT;
+ listVector.addOrGetVector(FieldType.nullable(type.getType()));
+
+ UnionListWriter listWriter = listVector.getWriter();
+
+ /* allocate memory */
+ listWriter.allocate();
+
+ /* populate data */
+ listWriter.setPosition(0);
+ listWriter.startList();
+ listWriter.bigInt().writeBigInt(10);
+ listWriter.bigInt().writeBigInt(11);
+ listWriter.bigInt().writeBigInt(12);
+ listWriter.endList();
+
+ listWriter.setPosition(1);
+ listWriter.startList();
+ listWriter.bigInt().writeBigInt(13);
+ listWriter.bigInt().writeBigInt(14);
+ listWriter.endList();
+
+ listWriter.setPosition(2);
+ listWriter.startList();
+ listWriter.bigInt().writeBigInt(15);
+ listWriter.bigInt().writeBigInt(16);
+ listWriter.bigInt().writeBigInt(17);
+ listWriter.bigInt().writeBigInt(18);
+ listWriter.endList();
+
+ listWriter.setPosition(3);
+ listWriter.startList();
+ listWriter.bigInt().writeBigInt(19);
+ listWriter.endList();
+
+ listWriter.setPosition(4);
+ listWriter.startList();
+ listWriter.bigInt().writeBigInt(20);
+ listWriter.bigInt().writeBigInt(21);
+ listWriter.bigInt().writeBigInt(22);
+ listWriter.bigInt().writeBigInt(23);
+ listWriter.endList();
+
+ listVector.setValueCount(5);
+
+ assertEquals(4, listVector.getLastSet());
+
+ /* get offset buffer */
+ final ArrowBuf offsetBuffer = listVector.getOffsetBuffer();
+
+ /* get dataVector */
+ BigIntVector dataVector = (BigIntVector) listVector.getDataVector();
+
+ /* check the vector output */
+
+ int index = 0;
+ int offset = 0;
+ Long actual = null;
+
+ /* index 0 */
+ assertFalse(listVector.isNull(index));
+ offset = offsetBuffer.getInt(index * ListVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(0), Integer.toString(offset));
+
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(10), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(11), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(12), actual);
+
+ /* index 1 */
+ index++;
+ assertFalse(listVector.isNull(index));
+ offset = offsetBuffer.getInt(index * ListVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(3), Integer.toString(offset));
+
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(13), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(14), actual);
+
+ /* index 2 */
+ index++;
+ assertFalse(listVector.isNull(index));
+ offset = offsetBuffer.getInt(index * ListVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(5), Integer.toString(offset));
+
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(15), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(16), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(17), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(18), actual);
+
+ /* index 3 */
+ index++;
+ assertFalse(listVector.isNull(index));
+ offset = offsetBuffer.getInt(index * ListVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(9), Integer.toString(offset));
+
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(19), actual);
+
+ /* index 4 */
+ index++;
+ assertFalse(listVector.isNull(index));
+ offset = offsetBuffer.getInt(index * ListVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(10), Integer.toString(offset));
+
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(20), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(21), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(22), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(23), actual);
+
+ /* index 5 */
+ index++;
+ assertTrue(listVector.isNull(index));
+ offset = offsetBuffer.getInt(index * ListVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(14), Integer.toString(offset));
+
+ /* do split and transfer */
+ try (ListVector toVector = ListVector.empty("toVector", allocator)) {
+
+ TransferPair transferPair = listVector.makeTransferPair(toVector);
+
+ int[][] transferLengths = {{0, 2}, {3, 1}, {4, 1}};
+
+ for (final int[] transferLength : transferLengths) {
+ int start = transferLength[0];
+ int splitLength = transferLength[1];
+
+ int dataLength1 = 0;
+ int dataLength2 = 0;
+
+ int offset1 = 0;
+ int offset2 = 0;
+
+ transferPair.splitAndTransfer(start, splitLength);
+
+ /* get offsetBuffer of toVector */
+ final ArrowBuf toOffsetBuffer = toVector.getOffsetBuffer();
+
+ /* get dataVector of toVector */
+ BigIntVector dataVector1 = (BigIntVector) toVector.getDataVector();
+
+ for (int i = 0; i < splitLength; i++) {
+ dataLength1 = offsetBuffer.getInt((start + i + 1) * ListVector.OFFSET_WIDTH) -
+ offsetBuffer.getInt((start + i) * ListVector.OFFSET_WIDTH);
+ dataLength2 = toOffsetBuffer.getInt((i + 1) * ListVector.OFFSET_WIDTH) -
+ toOffsetBuffer.getInt(i * ListVector.OFFSET_WIDTH);
+
+ assertEquals("Different data lengths at index: " + i + " and start: " + start,
+ dataLength1, dataLength2);
+
+ offset1 = offsetBuffer.getInt((start + i) * ListVector.OFFSET_WIDTH);
+ offset2 = toOffsetBuffer.getInt(i * ListVector.OFFSET_WIDTH);
+
+ for (int j = 0; j < dataLength1; j++) {
+ assertEquals("Different data at indexes: " + offset1 + " and " + offset2,
+ dataVector.getObject(offset1), dataVector1.getObject(offset2));
+
+ offset1++;
+ offset2++;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testNestedListVector() throws Exception {
+ try (ListVector listVector = ListVector.empty("sourceVector", allocator)) {
+
+ UnionListWriter listWriter = listVector.getWriter();
+
+ /* allocate memory */
+ listWriter.allocate();
+
+ /* the dataVector that backs a listVector will also be a
+ * listVector for this test.
+ */
+
+ /* write one or more inner lists at index 0 */
+ listWriter.setPosition(0);
+ listWriter.startList();
+
+ listWriter.list().startList();
+ listWriter.list().bigInt().writeBigInt(50);
+ listWriter.list().bigInt().writeBigInt(100);
+ listWriter.list().bigInt().writeBigInt(200);
+ listWriter.list().endList();
+
+ listWriter.list().startList();
+ listWriter.list().bigInt().writeBigInt(75);
+ listWriter.list().bigInt().writeBigInt(125);
+ listWriter.list().bigInt().writeBigInt(150);
+ listWriter.list().bigInt().writeBigInt(175);
+ listWriter.list().endList();
+
+ listWriter.endList();
+
+ /* write one or more inner lists at index 1 */
+ listWriter.setPosition(1);
+ listWriter.startList();
+
+ listWriter.list().startList();
+ listWriter.list().bigInt().writeBigInt(10);
+ listWriter.list().endList();
+
+ listWriter.list().startList();
+ listWriter.list().bigInt().writeBigInt(15);
+ listWriter.list().bigInt().writeBigInt(20);
+ listWriter.list().endList();
+
+ listWriter.list().startList();
+ listWriter.list().bigInt().writeBigInt(25);
+ listWriter.list().bigInt().writeBigInt(30);
+ listWriter.list().bigInt().writeBigInt(35);
+ listWriter.list().endList();
+
+ listWriter.endList();
+
+ assertEquals(1, listVector.getLastSet());
+
+ listVector.setValueCount(2);
+
+ assertEquals(2, listVector.getValueCount());
+
+ /* get listVector value at index 0 -- the value itself is a listvector */
+ Object result = listVector.getObject(0);
+ ArrayList<ArrayList<Long>> resultSet = (ArrayList<ArrayList<Long>>) result;
+ ArrayList<Long> list;
+
+ assertEquals(2, resultSet.size()); /* 2 inner lists at index 0 */
+ assertEquals(3, resultSet.get(0).size()); /* size of first inner list */
+ assertEquals(4, resultSet.get(1).size()); /* size of second inner list */
+
+ list = resultSet.get(0);
+ assertEquals(new Long(50), list.get(0));
+ assertEquals(new Long(100), list.get(1));
+ assertEquals(new Long(200), list.get(2));
+
+ list = resultSet.get(1);
+ assertEquals(new Long(75), list.get(0));
+ assertEquals(new Long(125), list.get(1));
+ assertEquals(new Long(150), list.get(2));
+ assertEquals(new Long(175), list.get(3));
+
+ /* get listVector value at index 1 -- the value itself is a listvector */
+ result = listVector.getObject(1);
+ resultSet = (ArrayList<ArrayList<Long>>) result;
+
+ assertEquals(3, resultSet.size()); /* 3 inner lists at index 1 */
+ assertEquals(1, resultSet.get(0).size()); /* size of first inner list */
+ assertEquals(2, resultSet.get(1).size()); /* size of second inner list */
+ assertEquals(3, resultSet.get(2).size()); /* size of third inner list */
+
+ list = resultSet.get(0);
+ assertEquals(new Long(10), list.get(0));
+
+ list = resultSet.get(1);
+ assertEquals(new Long(15), list.get(0));
+ assertEquals(new Long(20), list.get(1));
+
+ list = resultSet.get(2);
+ assertEquals(new Long(25), list.get(0));
+ assertEquals(new Long(30), list.get(1));
+ assertEquals(new Long(35), list.get(2));
+
+ /* check underlying bitVector */
+ assertFalse(listVector.isNull(0));
+ assertFalse(listVector.isNull(1));
+
+ /* check underlying offsets */
+ final ArrowBuf offsetBuffer = listVector.getOffsetBuffer();
+
+ /* listVector has 2 lists at index 0 and 3 lists at index 1 */
+ assertEquals(0, offsetBuffer.getInt(0 * ListVector.OFFSET_WIDTH));
+ assertEquals(2, offsetBuffer.getInt(1 * ListVector.OFFSET_WIDTH));
+ assertEquals(5, offsetBuffer.getInt(2 * ListVector.OFFSET_WIDTH));
+ }
+ }
+
+ @Test
+ public void testNestedListVector1() throws Exception {
+ try (ListVector listVector = ListVector.empty("sourceVector", allocator)) {
+
+ MinorType listType = MinorType.LIST;
+ MinorType scalarType = MinorType.BIGINT;
+
+ listVector.addOrGetVector(FieldType.nullable(listType.getType()));
+
+ ListVector innerList1 = (ListVector) listVector.getDataVector();
+ innerList1.addOrGetVector(FieldType.nullable(listType.getType()));
+
+ ListVector innerList2 = (ListVector) innerList1.getDataVector();
+ innerList2.addOrGetVector(FieldType.nullable(listType.getType()));
+
+ ListVector innerList3 = (ListVector) innerList2.getDataVector();
+ innerList3.addOrGetVector(FieldType.nullable(listType.getType()));
+
+ ListVector innerList4 = (ListVector) innerList3.getDataVector();
+ innerList4.addOrGetVector(FieldType.nullable(listType.getType()));
+
+ ListVector innerList5 = (ListVector) innerList4.getDataVector();
+ innerList5.addOrGetVector(FieldType.nullable(listType.getType()));
+
+ ListVector innerList6 = (ListVector) innerList5.getDataVector();
+ innerList6.addOrGetVector(FieldType.nullable(scalarType.getType()));
+
+ listVector.setInitialCapacity(128);
+ }
+ }
+
+ @Test
+ public void testNestedListVector2() throws Exception {
+ try (ListVector listVector = ListVector.empty("sourceVector", allocator)) {
+ listVector.setInitialCapacity(1);
+ UnionListWriter listWriter = listVector.getWriter();
+ /* allocate memory */
+ listWriter.allocate();
+
+ /* write one or more inner lists at index 0 */
+ listWriter.setPosition(0);
+ listWriter.startList();
+
+ listWriter.list().startList();
+ listWriter.list().bigInt().writeBigInt(50);
+ listWriter.list().bigInt().writeBigInt(100);
+ listWriter.list().bigInt().writeBigInt(200);
+ listWriter.list().endList();
+
+ listWriter.list().startList();
+ listWriter.list().bigInt().writeBigInt(75);
+ listWriter.list().bigInt().writeBigInt(125);
+ listWriter.list().endList();
+
+ listWriter.endList();
+
+ /* write one or more inner lists at index 1 */
+ listWriter.setPosition(1);
+ listWriter.startList();
+
+ listWriter.list().startList();
+ listWriter.list().bigInt().writeBigInt(15);
+ listWriter.list().bigInt().writeBigInt(20);
+ listWriter.list().endList();
+
+ listWriter.list().startList();
+ listWriter.list().bigInt().writeBigInt(25);
+ listWriter.list().bigInt().writeBigInt(30);
+ listWriter.list().bigInt().writeBigInt(35);
+ listWriter.list().endList();
+
+ listWriter.endList();
+
+ assertEquals(1, listVector.getLastSet());
+
+ listVector.setValueCount(2);
+
+ assertEquals(2, listVector.getValueCount());
+
+ /* get listVector value at index 0 -- the value itself is a listvector */
+ Object result = listVector.getObject(0);
+ ArrayList<ArrayList<Long>> resultSet = (ArrayList<ArrayList<Long>>) result;
+ ArrayList<Long> list;
+
+ assertEquals(2, resultSet.size()); /* 2 inner lists at index 0 */
+ assertEquals(3, resultSet.get(0).size()); /* size of first inner list */
+ assertEquals(2, resultSet.get(1).size()); /* size of second inner list */
+
+ list = resultSet.get(0);
+ assertEquals(new Long(50), list.get(0));
+ assertEquals(new Long(100), list.get(1));
+ assertEquals(new Long(200), list.get(2));
+
+ list = resultSet.get(1);
+ assertEquals(new Long(75), list.get(0));
+ assertEquals(new Long(125), list.get(1));
+
+ /* get listVector value at index 1 -- the value itself is a listvector */
+ result = listVector.getObject(1);
+ resultSet = (ArrayList<ArrayList<Long>>) result;
+
+ assertEquals(2, resultSet.size()); /* 3 inner lists at index 1 */
+ assertEquals(2, resultSet.get(0).size()); /* size of first inner list */
+ assertEquals(3, resultSet.get(1).size()); /* size of second inner list */
+
+ list = resultSet.get(0);
+ assertEquals(new Long(15), list.get(0));
+ assertEquals(new Long(20), list.get(1));
+
+ list = resultSet.get(1);
+ assertEquals(new Long(25), list.get(0));
+ assertEquals(new Long(30), list.get(1));
+ assertEquals(new Long(35), list.get(2));
+
+ /* check underlying bitVector */
+ assertFalse(listVector.isNull(0));
+ assertFalse(listVector.isNull(1));
+
+ /* check underlying offsets */
+ final ArrowBuf offsetBuffer = listVector.getOffsetBuffer();
+
+ /* listVector has 2 lists at index 0 and 3 lists at index 1 */
+ assertEquals(0, offsetBuffer.getInt(0 * ListVector.OFFSET_WIDTH));
+ assertEquals(2, offsetBuffer.getInt(1 * ListVector.OFFSET_WIDTH));
+ assertEquals(4, offsetBuffer.getInt(2 * ListVector.OFFSET_WIDTH));
+ }
+ }
+
+ @Test
+ public void testGetBufferAddress() throws Exception {
+ try (ListVector listVector = ListVector.empty("vector", allocator)) {
+
+ UnionListWriter listWriter = listVector.getWriter();
+ boolean error = false;
+
+ listWriter.allocate();
+
+ listWriter.setPosition(0);
+ listWriter.startList();
+ listWriter.bigInt().writeBigInt(50);
+ listWriter.bigInt().writeBigInt(100);
+ listWriter.bigInt().writeBigInt(200);
+ listWriter.endList();
+
+ listWriter.setPosition(1);
+ listWriter.startList();
+ listWriter.bigInt().writeBigInt(250);
+ listWriter.bigInt().writeBigInt(300);
+ listWriter.endList();
+
+ listVector.setValueCount(2);
+
+ /* check listVector contents */
+ Object result = listVector.getObject(0);
+ ArrayList<Long> resultSet = (ArrayList<Long>) result;
+ assertEquals(3, resultSet.size());
+ assertEquals(new Long(50), resultSet.get(0));
+ assertEquals(new Long(100), resultSet.get(1));
+ assertEquals(new Long(200), resultSet.get(2));
+
+ result = listVector.getObject(1);
+ resultSet = (ArrayList<Long>) result;
+ assertEquals(2, resultSet.size());
+ assertEquals(new Long(250), resultSet.get(0));
+ assertEquals(new Long(300), resultSet.get(1));
+
+ List<ArrowBuf> buffers = listVector.getFieldBuffers();
+
+ long bitAddress = listVector.getValidityBufferAddress();
+ long offsetAddress = listVector.getOffsetBufferAddress();
+
+ try {
+ long dataAddress = listVector.getDataBufferAddress();
+ } catch (UnsupportedOperationException ue) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ }
+
+ assertEquals(2, buffers.size());
+ assertEquals(bitAddress, buffers.get(0).memoryAddress());
+ assertEquals(offsetAddress, buffers.get(1).memoryAddress());
+
+ /* (3+2)/2 */
+ assertEquals(2.5, listVector.getDensity(), 0);
+ }
+ }
+
+ @Test
+ public void testConsistentChildName() throws Exception {
+ try (ListVector listVector = ListVector.empty("sourceVector", allocator)) {
+ String emptyListStr = listVector.getField().toString();
+ assertTrue(emptyListStr.contains(ListVector.DATA_VECTOR_NAME));
+
+ listVector.addOrGetVector(FieldType.nullable(MinorType.INT.getType()));
+ String emptyVectorStr = listVector.getField().toString();
+ assertTrue(emptyVectorStr.contains(ListVector.DATA_VECTOR_NAME));
+ }
+ }
+
+ @Test
+ public void testSetInitialCapacity() {
+ try (final ListVector vector = ListVector.empty("", allocator)) {
+ vector.addOrGetVector(FieldType.nullable(MinorType.INT.getType()));
+
+ /**
+ * use the default multiplier of 5,
+ * 512 * 5 => 2560 * 4 => 10240 bytes => 16KB => 4096 value capacity.
+ */
+ vector.setInitialCapacity(512);
+ vector.allocateNew();
+ assertEquals(512, vector.getValueCapacity());
+ assertTrue(vector.getDataVector().getValueCapacity() >= 512 * 5);
+
+ /* use density as 4 */
+ vector.setInitialCapacity(512, 4);
+ vector.allocateNew();
+ assertEquals(512, vector.getValueCapacity());
+ assertTrue(vector.getDataVector().getValueCapacity() >= 512 * 4);
+
+ /**
+ * inner value capacity we pass to data vector is 512 * 0.1 => 51
+ * For an int vector this is 204 bytes of memory for data buffer
+ * and 7 bytes for validity buffer.
+ * and with power of 2 allocation, we allocate 256 bytes and 8 bytes
+ * for the data buffer and validity buffer of the inner vector. Thus
+ * value capacity of inner vector is 64
+ */
+ vector.setInitialCapacity(512, 0.1);
+ vector.allocateNew();
+ assertEquals(512, vector.getValueCapacity());
+ assertTrue(vector.getDataVector().getValueCapacity() >= 51);
+
+ /**
+ * inner value capacity we pass to data vector is 512 * 0.01 => 5
+ * For an int vector this is 20 bytes of memory for data buffer
+ * and 1 byte for validity buffer.
+ * and with power of 2 allocation, we allocate 32 bytes and 1 bytes
+ * for the data buffer and validity buffer of the inner vector. Thus
+ * value capacity of inner vector is 8
+ */
+ vector.setInitialCapacity(512, 0.01);
+ vector.allocateNew();
+ assertEquals(512, vector.getValueCapacity());
+ assertTrue(vector.getDataVector().getValueCapacity() >= 5);
+
+ /**
+ * inner value capacity we pass to data vector is 5 * 0.1 => 0
+ * which is then rounded off to 1. So we pass value count as 1
+ * to the inner int vector.
+ * the offset buffer of the list vector is allocated for 6 values
+ * which is 24 bytes and then rounded off to 32 bytes (8 values)
+ * the validity buffer of the list vector is allocated for 5
+ * values which is 1 byte. This is why value capacity of the list
+ * vector is 7 as we take the min of validity buffer value capacity
+ * and offset buffer value capacity.
+ */
+ vector.setInitialCapacity(5, 0.1);
+ vector.allocateNew();
+ assertEquals(7, vector.getValueCapacity());
+ assertTrue(vector.getDataVector().getValueCapacity() >= 1);
+ }
+ }
+
+ @Test
+ public void testClearAndReuse() {
+ try (final ListVector vector = ListVector.empty("list", allocator)) {
+ BigIntVector bigIntVector =
+ (BigIntVector) vector.addOrGetVector(FieldType.nullable(MinorType.BIGINT.getType())).getVector();
+ vector.setInitialCapacity(10);
+ vector.allocateNew();
+
+ vector.startNewValue(0);
+ bigIntVector.setSafe(0, 7);
+ vector.endValue(0, 1);
+ vector.startNewValue(1);
+ bigIntVector.setSafe(1, 8);
+ vector.endValue(1, 1);
+ vector.setValueCount(2);
+
+ Object result = vector.getObject(0);
+ ArrayList<Long> resultSet = (ArrayList<Long>) result;
+ assertEquals(new Long(7), resultSet.get(0));
+
+ result = vector.getObject(1);
+ resultSet = (ArrayList<Long>) result;
+ assertEquals(new Long(8), resultSet.get(0));
+
+ // Clear and release the buffers to trigger a realloc when adding next value
+ vector.clear();
+
+ // The list vector should reuse a buffer when reallocating the offset buffer
+ vector.startNewValue(0);
+ bigIntVector.setSafe(0, 7);
+ vector.endValue(0, 1);
+ vector.startNewValue(1);
+ bigIntVector.setSafe(1, 8);
+ vector.endValue(1, 1);
+ vector.setValueCount(2);
+
+ result = vector.getObject(0);
+ resultSet = (ArrayList<Long>) result;
+ assertEquals(new Long(7), resultSet.get(0));
+
+ result = vector.getObject(1);
+ resultSet = (ArrayList<Long>) result;
+ assertEquals(new Long(8), resultSet.get(0));
+ }
+ }
+
+ @Test
+ public void testWriterGetField() {
+ try (final ListVector vector = ListVector.empty("list", allocator)) {
+
+ UnionListWriter writer = vector.getWriter();
+ writer.allocate();
+
+ //set some values
+ writer.startList();
+ writer.integer().writeInt(1);
+ writer.integer().writeInt(2);
+ writer.endList();
+ vector.setValueCount(2);
+
+ Field expectedDataField = new Field(BaseRepeatedValueVector.DATA_VECTOR_NAME,
+ FieldType.nullable(new ArrowType.Int(32, true)), null);
+ Field expectedField = new Field(vector.getName(), FieldType.nullable(ArrowType.List.INSTANCE),
+ Arrays.asList(expectedDataField));
+
+ assertEquals(expectedField, writer.getField());
+ }
+ }
+
+ @Test
+ public void testClose() throws Exception {
+ try (final ListVector vector = ListVector.empty("list", allocator)) {
+
+ UnionListWriter writer = vector.getWriter();
+ writer.allocate();
+
+ //set some values
+ writer.startList();
+ writer.integer().writeInt(1);
+ writer.integer().writeInt(2);
+ writer.endList();
+ vector.setValueCount(2);
+
+ assertTrue(vector.getBufferSize() > 0);
+ assertTrue(vector.getDataVector().getBufferSize() > 0);
+
+ writer.close();
+ assertEquals(0, vector.getBufferSize());
+ assertEquals(0, vector.getDataVector().getBufferSize());
+ }
+ }
+
+ @Test
+ public void testGetBufferSizeFor() {
+ try (final ListVector vector = ListVector.empty("list", allocator)) {
+
+ UnionListWriter writer = vector.getWriter();
+ writer.allocate();
+
+ //set some values
+ writeIntValues(writer, new int[] {1, 2});
+ writeIntValues(writer, new int[] {3, 4});
+ writeIntValues(writer, new int[] {5, 6});
+ writeIntValues(writer, new int[] {7, 8, 9, 10});
+ writeIntValues(writer, new int[] {11, 12, 13, 14});
+ writer.setValueCount(5);
+
+ IntVector dataVector = (IntVector) vector.getDataVector();
+ int[] indices = new int[] {0, 2, 4, 6, 10, 14};
+
+ for (int valueCount = 1; valueCount <= 5; valueCount++) {
+ int validityBufferSize = BitVectorHelper.getValidityBufferSize(valueCount);
+ int offsetBufferSize = (valueCount + 1) * BaseRepeatedValueVector.OFFSET_WIDTH;
+
+ int expectedSize = validityBufferSize + offsetBufferSize + dataVector.getBufferSizeFor(indices[valueCount]);
+ assertEquals(expectedSize, vector.getBufferSizeFor(valueCount));
+ }
+ }
+ }
+
+ @Test
+ public void testIsEmpty() {
+ try (final ListVector vector = ListVector.empty("list", allocator)) {
+ UnionListWriter writer = vector.getWriter();
+ writer.allocate();
+
+ // set values [1,2], null, [], [5,6]
+ writeIntValues(writer, new int[] {1, 2});
+ writer.setPosition(2);
+ writeIntValues(writer, new int[] {});
+ writeIntValues(writer, new int[] {5, 6});
+ writer.setValueCount(4);
+
+ assertFalse(vector.isEmpty(0));
+ assertTrue(vector.isNull(1));
+ assertTrue(vector.isEmpty(1));
+ assertFalse(vector.isNull(2));
+ assertTrue(vector.isEmpty(2));
+ assertFalse(vector.isEmpty(3));
+ }
+ }
+
+ private void writeIntValues(UnionListWriter writer, int[] values) {
+ writer.startList();
+ for (int v: values) {
+ writer.integer().writeInt(v);
+ }
+ writer.endList();
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java
new file mode 100644
index 000000000..9637021db
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java
@@ -0,0 +1,1113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.util.ArrayList;
+import java.util.Map;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.MapVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.impl.UnionMapReader;
+import org.apache.arrow.vector.complex.impl.UnionMapWriter;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.JsonStringArrayList;
+import org.apache.arrow.vector.util.TransferPair;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestMapVector {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ public <T> T getResultKey(Map<?, T> resultStruct) {
+ assertTrue(resultStruct.containsKey(MapVector.KEY_NAME));
+ return resultStruct.get(MapVector.KEY_NAME);
+ }
+
+ public <T> T getResultValue(Map<?, T> resultStruct) {
+ assertTrue(resultStruct.containsKey(MapVector.VALUE_NAME));
+ return resultStruct.get(MapVector.VALUE_NAME);
+ }
+
+ @Test
+ public void testBasicOperation() {
+ int count = 5;
+ try (MapVector mapVector = MapVector.empty("map", allocator, false)) {
+ mapVector.allocateNew();
+ UnionMapWriter mapWriter = mapVector.getWriter();
+ for (int i = 0; i < count; i++) {
+ mapWriter.startMap();
+ for (int j = 0; j < i + 1; j++) {
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(j);
+ mapWriter.value().integer().writeInt(j);
+ mapWriter.endEntry();
+ }
+ mapWriter.endMap();
+ }
+ mapWriter.setValueCount(count);
+ UnionMapReader mapReader = mapVector.getReader();
+ for (int i = 0; i < count; i++) {
+ mapReader.setPosition(i);
+ for (int j = 0; j < i + 1; j++) {
+ mapReader.next();
+ assertEquals("record: " + i, j, mapReader.key().readLong().longValue());
+ assertEquals(j, mapReader.value().readInteger().intValue());
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testBasicOperationNulls() {
+ int count = 6;
+ try (MapVector mapVector = MapVector.empty("map", allocator, false)) {
+ mapVector.allocateNew();
+ UnionMapWriter mapWriter = mapVector.getWriter();
+ for (int i = 0; i < count; i++) {
+ // i == 1 is a NULL
+ if (i != 1) {
+ mapWriter.setPosition(i);
+ mapWriter.startMap();
+ // i == 3 is an empty map
+ if (i != 3) {
+ for (int j = 0; j < i + 1; j++) {
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(j);
+ // i == 5 maps to a NULL value
+ if (i != 5) {
+ mapWriter.value().integer().writeInt(j);
+ }
+ mapWriter.endEntry();
+ }
+ }
+ mapWriter.endMap();
+ }
+ }
+ mapWriter.setValueCount(count);
+ UnionMapReader mapReader = mapVector.getReader();
+ for (int i = 0; i < count; i++) {
+ mapReader.setPosition(i);
+ if (i == 1) {
+ assertFalse(mapReader.isSet());
+ } else {
+ if (i == 3) {
+ JsonStringArrayList<?> result = (JsonStringArrayList<?>) mapReader.readObject();
+ assertTrue(result.isEmpty());
+ } else {
+ for (int j = 0; j < i + 1; j++) {
+ mapReader.next();
+ assertEquals("record: " + i, j, mapReader.key().readLong().longValue());
+ if (i == 5) {
+ assertFalse(mapReader.value().isSet());
+ } else {
+ assertEquals(j, mapReader.value().readInteger().intValue());
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testCopyFrom() throws Exception {
+ try (MapVector inVector = MapVector.empty("input", allocator, false);
+ MapVector outVector = MapVector.empty("output", allocator, false)) {
+ UnionMapWriter writer = inVector.getWriter();
+ writer.allocate();
+
+ // populate input vector with the following records
+ // {1 -> 11, 2 -> 22, 3 -> 33}
+ // null
+ // {2 -> null}
+ writer.setPosition(0); // optional
+ writer.startMap();
+ writer.startEntry();
+ writer.key().bigInt().writeBigInt(1);
+ writer.value().bigInt().writeBigInt(11);
+ writer.endEntry();
+ writer.startEntry();
+ writer.key().bigInt().writeBigInt(2);
+ writer.value().bigInt().writeBigInt(22);
+ writer.endEntry();
+ writer.startEntry();
+ writer.key().bigInt().writeBigInt(3);
+ writer.value().bigInt().writeBigInt(33);
+ writer.endEntry();
+ writer.endMap();
+
+ writer.setPosition(2);
+ writer.startMap();
+ writer.startEntry();
+ writer.key().bigInt().writeBigInt(2);
+ writer.endEntry();
+ writer.endMap();
+
+ writer.setValueCount(3);
+
+ // copy values from input to output
+ outVector.allocateNew();
+ for (int i = 0; i < 3; i++) {
+ outVector.copyFrom(i, i, inVector);
+ }
+ outVector.setValueCount(3);
+
+ // assert the output vector is correct
+ FieldReader reader = outVector.getReader();
+ assertTrue("shouldn't be null", reader.isSet());
+ reader.setPosition(1);
+ assertFalse("should be null", reader.isSet());
+ reader.setPosition(2);
+ assertTrue("shouldn't be null", reader.isSet());
+
+
+ /* index 0 */
+ Object result = outVector.getObject(0);
+ ArrayList<?> resultSet = (ArrayList<?>) result;
+ assertEquals(3, resultSet.size());
+ Map<?, ?> resultStruct = (Map<?, ?>) resultSet.get(0);
+ assertEquals(1L, getResultKey(resultStruct));
+ assertEquals(11L, getResultValue(resultStruct));
+ resultStruct = (Map<?, ?>) resultSet.get(1);
+ assertEquals(2L, getResultKey(resultStruct));
+ assertEquals(22L, getResultValue(resultStruct));
+ resultStruct = (Map<?, ?>) resultSet.get(2);
+ assertEquals(3L, getResultKey(resultStruct));
+ assertEquals(33L, getResultValue(resultStruct));
+
+ /* index 1 */
+ result = outVector.getObject(1);
+ assertNull(result);
+
+ /* index 2 */
+ result = outVector.getObject(2);
+ resultSet = (ArrayList<?>) result;
+ assertEquals(1, resultSet.size());
+ resultStruct = (Map<?, ?>) resultSet.get(0);
+ assertEquals(2L, getResultKey(resultStruct));
+ assertFalse(resultStruct.containsKey(MapVector.VALUE_NAME));
+ }
+ }
+
+ @Test
+ public void testSplitAndTransfer() throws Exception {
+ try (MapVector mapVector = MapVector.empty("sourceVector", allocator, false)) {
+
+ /* Explicitly add the map child vectors */
+ FieldType type = new FieldType(false, ArrowType.Struct.INSTANCE, null, null);
+ AddOrGetResult<StructVector> addResult = mapVector.addOrGetVector(type);
+ FieldType keyType = new FieldType(false, MinorType.BIGINT.getType(), null, null);
+ FieldType valueType = FieldType.nullable(MinorType.FLOAT8.getType());
+ addResult.getVector().addOrGet(MapVector.KEY_NAME, keyType, BigIntVector.class);
+ addResult.getVector().addOrGet(MapVector.VALUE_NAME, valueType, Float8Vector.class);
+
+ UnionMapWriter mapWriter = mapVector.getWriter();
+
+ /* allocate memory */
+ mapWriter.allocate();
+
+ /* populate data */
+ mapWriter.setPosition(0);
+ mapWriter.startMap();
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(10);
+ mapWriter.value().float8().writeFloat8(1.0);
+ mapWriter.endEntry();
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(11);
+ mapWriter.value().float8().writeFloat8(1.1);
+ mapWriter.endEntry();
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(12);
+ mapWriter.value().float8().writeFloat8(1.2);
+ mapWriter.endEntry();
+ mapWriter.endMap();
+
+ mapWriter.setPosition(1);
+ mapWriter.startMap();
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(13);
+ mapWriter.value().float8().writeFloat8(1.3);
+ mapWriter.endEntry();
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(14);
+ mapWriter.value().float8().writeFloat8(1.4);
+ mapWriter.endEntry();
+ mapWriter.endMap();
+
+ mapWriter.setPosition(2);
+ mapWriter.startMap();
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(15);
+ mapWriter.value().float8().writeFloat8(1.5);
+ mapWriter.endEntry();
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(16);
+ mapWriter.value().float8().writeFloat8(1.6);
+ mapWriter.endEntry();
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(17);
+ mapWriter.value().float8().writeFloat8(1.7);
+ mapWriter.endEntry();
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(18);
+ mapWriter.value().float8().writeFloat8(1.8);
+ mapWriter.endEntry();
+ mapWriter.endMap();
+
+ mapWriter.setPosition(3);
+ mapWriter.startMap();
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(19);
+ mapWriter.value().float8().writeFloat8(1.9);
+ mapWriter.endEntry();
+ mapWriter.endMap();
+
+ mapWriter.setPosition(4);
+ mapWriter.startMap();
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(20);
+ mapWriter.value().float8().writeFloat8(2.0);
+ mapWriter.endEntry();
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(21);
+ mapWriter.value().float8().writeFloat8(2.1);
+ mapWriter.endEntry();
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(22);
+ mapWriter.value().float8().writeFloat8(2.2);
+ mapWriter.endEntry();
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(23);
+ mapWriter.value().float8().writeFloat8(2.3);
+ mapWriter.endEntry();
+ mapWriter.endMap();
+
+ mapVector.setValueCount(5);
+
+ assertEquals(4, mapVector.getLastSet());
+
+ /* get offset buffer */
+ final ArrowBuf offsetBuffer = mapVector.getOffsetBuffer();
+
+ /* get dataVector */
+ StructVector dataVector = (StructVector) mapVector.getDataVector();
+
+ /* check the vector output */
+ int index = 0;
+ int offset = 0;
+ Map<?, ?> result = null;
+
+ /* index 0 */
+ assertFalse(mapVector.isNull(index));
+ offset = offsetBuffer.getInt(index * MapVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(0), Integer.toString(offset));
+
+ result = dataVector.getObject(offset);
+ assertEquals(10L, getResultKey(result));
+ assertEquals(1.0, getResultValue(result));
+ offset++;
+ result = dataVector.getObject(offset);
+ assertEquals(11L, getResultKey(result));
+ assertEquals(1.1, getResultValue(result));
+ offset++;
+ result = dataVector.getObject(offset);
+ assertEquals(12L, getResultKey(result));
+ assertEquals(1.2, getResultValue(result));
+
+ /* index 1 */
+ index++;
+ assertFalse(mapVector.isNull(index));
+ offset = offsetBuffer.getInt(index * MapVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(3), Integer.toString(offset));
+
+ result = dataVector.getObject(offset);
+ assertEquals(13L, getResultKey(result));
+ assertEquals(1.3, getResultValue(result));
+ offset++;
+ result = dataVector.getObject(offset);
+ assertEquals(14L, getResultKey(result));
+ assertEquals(1.4, getResultValue(result));
+
+ /* index 2 */
+ index++;
+ assertFalse(mapVector.isNull(index));
+ offset = offsetBuffer.getInt(index * MapVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(5), Integer.toString(offset));
+
+ result = dataVector.getObject(offset);
+ assertEquals(15L, getResultKey(result));
+ assertEquals(1.5, getResultValue(result));
+ offset++;
+ result = dataVector.getObject(offset);
+ assertEquals(16L, getResultKey(result));
+ assertEquals(1.6, getResultValue(result));
+ offset++;
+ result = dataVector.getObject(offset);
+ assertEquals(17L, getResultKey(result));
+ assertEquals(1.7, getResultValue(result));
+ offset++;
+ result = dataVector.getObject(offset);
+ assertEquals(18L, getResultKey(result));
+ assertEquals(1.8, getResultValue(result));
+
+ /* index 3 */
+ index++;
+ assertFalse(mapVector.isNull(index));
+ offset = offsetBuffer.getInt(index * MapVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(9), Integer.toString(offset));
+
+ result = dataVector.getObject(offset);
+ assertEquals(19L, getResultKey(result));
+ assertEquals(1.9, getResultValue(result));
+
+ /* index 4 */
+ index++;
+ assertFalse(mapVector.isNull(index));
+ offset = offsetBuffer.getInt(index * MapVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(10), Integer.toString(offset));
+
+ result = dataVector.getObject(offset);
+ assertEquals(20L, getResultKey(result));
+ assertEquals(2.0, getResultValue(result));
+ offset++;
+ result = dataVector.getObject(offset);
+ assertEquals(21L, getResultKey(result));
+ assertEquals(2.1, getResultValue(result));
+ offset++;
+ result = dataVector.getObject(offset);
+ assertEquals(22L, getResultKey(result));
+ assertEquals(2.2, getResultValue(result));
+ offset++;
+ result = dataVector.getObject(offset);
+ assertEquals(23L, getResultKey(result));
+ assertEquals(2.3, getResultValue(result));
+
+ /* index 5 */
+ index++;
+ assertTrue(mapVector.isNull(index));
+ offset = offsetBuffer.getInt(index * MapVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(14), Integer.toString(offset));
+
+ /* do split and transfer */
+ try (MapVector toVector = MapVector.empty("toVector", allocator, false)) {
+
+ TransferPair transferPair = mapVector.makeTransferPair(toVector);
+
+ int[][] transferLengths = {{0, 2}, {3, 1}, {4, 1}};
+
+ for (final int[] transferLength : transferLengths) {
+ int start = transferLength[0];
+ int splitLength = transferLength[1];
+
+ int dataLength1 = 0;
+ int dataLength2 = 0;
+
+ int offset1 = 0;
+ int offset2 = 0;
+
+ transferPair.splitAndTransfer(start, splitLength);
+
+ /* get offsetBuffer of toVector */
+ final ArrowBuf toOffsetBuffer = toVector.getOffsetBuffer();
+
+ /* get dataVector of toVector */
+ StructVector dataVector1 = (StructVector) toVector.getDataVector();
+
+ for (int i = 0; i < splitLength; i++) {
+ dataLength1 = offsetBuffer.getInt((start + i + 1) * MapVector.OFFSET_WIDTH) -
+ offsetBuffer.getInt((start + i) * MapVector.OFFSET_WIDTH);
+ dataLength2 = toOffsetBuffer.getInt((i + 1) * MapVector.OFFSET_WIDTH) -
+ toOffsetBuffer.getInt(i * MapVector.OFFSET_WIDTH);
+
+ assertEquals("Different data lengths at index: " + i + " and start: " + start,
+ dataLength1, dataLength2);
+
+ offset1 = offsetBuffer.getInt((start + i) * MapVector.OFFSET_WIDTH);
+ offset2 = toOffsetBuffer.getInt(i * MapVector.OFFSET_WIDTH);
+
+ for (int j = 0; j < dataLength1; j++) {
+ assertEquals("Different data at indexes: " + offset1 + " and " + offset2,
+ dataVector.getObject(offset1), dataVector1.getObject(offset2));
+
+ offset1++;
+ offset2++;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testMapWithListValue() throws Exception {
+ try (MapVector mapVector = MapVector.empty("sourceVector", allocator, false)) {
+
+ UnionMapWriter mapWriter = mapVector.getWriter();
+ ListWriter valueWriter;
+
+ /* allocate memory */
+ mapWriter.allocate();
+
+ /* the dataVector that backs a listVector will also be a
+ * listVector for this test.
+ */
+
+ /* write one or more maps index 0 */
+ mapWriter.setPosition(0);
+ mapWriter.startMap();
+
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(1);
+ valueWriter = mapWriter.value().list();
+ valueWriter.startList();
+ valueWriter.bigInt().writeBigInt(50);
+ valueWriter.bigInt().writeBigInt(100);
+ valueWriter.bigInt().writeBigInt(200);
+ valueWriter.endList();
+ mapWriter.endEntry();
+
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(2);
+ valueWriter = mapWriter.value().list();
+ valueWriter.startList();
+ valueWriter.bigInt().writeBigInt(75);
+ valueWriter.bigInt().writeBigInt(125);
+ valueWriter.bigInt().writeBigInt(150);
+ valueWriter.bigInt().writeBigInt(175);
+ valueWriter.endList();
+ mapWriter.endEntry();
+
+ mapWriter.endMap();
+
+ /* write one or more maps at index 1 */
+ mapWriter.setPosition(1);
+ mapWriter.startMap();
+
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(3);
+ valueWriter = mapWriter.value().list();
+ valueWriter.startList();
+ valueWriter.bigInt().writeBigInt(10);
+ valueWriter.endList();
+ mapWriter.endEntry();
+
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(4);
+ valueWriter = mapWriter.value().list();
+ valueWriter.startList();
+ valueWriter.bigInt().writeBigInt(15);
+ valueWriter.bigInt().writeBigInt(20);
+ valueWriter.endList();
+ mapWriter.endEntry();
+
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(5);
+ valueWriter = mapWriter.value().list();
+ valueWriter.startList();
+ valueWriter.bigInt().writeBigInt(25);
+ valueWriter.bigInt().writeBigInt(30);
+ valueWriter.bigInt().writeBigInt(35);
+ valueWriter.endList();
+ mapWriter.endEntry();
+
+ mapWriter.endMap();
+
+ assertEquals(1, mapVector.getLastSet());
+
+ mapWriter.setValueCount(2);
+
+ assertEquals(2, mapVector.getValueCount());
+
+ // Get mapVector element at index 0
+ Object result = mapVector.getObject(0);
+ ArrayList<?> resultSet = (ArrayList<?>) result;
+
+ // 2 map entries at index 0
+ assertEquals(2, resultSet.size());
+
+ // First Map entry
+ Map<?, ?> resultStruct = (Map<?, ?>) resultSet.get(0);
+ assertEquals(1L, getResultKey(resultStruct));
+ ArrayList<Long> list = (ArrayList<Long>) getResultValue(resultStruct);
+ assertEquals(3, list.size()); // value is a list with 3 elements
+ assertEquals(new Long(50), list.get(0));
+ assertEquals(new Long(100), list.get(1));
+ assertEquals(new Long(200), list.get(2));
+
+ // Second Map entry
+ resultStruct = (Map<?, ?>) resultSet.get(1);
+ list = (ArrayList<Long>) getResultValue(resultStruct);
+ assertEquals(4, list.size()); // value is a list with 4 elements
+ assertEquals(new Long(75), list.get(0));
+ assertEquals(new Long(125), list.get(1));
+ assertEquals(new Long(150), list.get(2));
+ assertEquals(new Long(175), list.get(3));
+
+ // Get mapVector element at index 1
+ result = mapVector.getObject(1);
+ resultSet = (ArrayList<?>) result;
+
+ // First Map entry
+ resultStruct = (Map<?, ?>) resultSet.get(0);
+ assertEquals(3L, getResultKey(resultStruct));
+ list = (ArrayList<Long>) getResultValue(resultStruct);
+ assertEquals(1, list.size()); // value is a list with 1 element
+ assertEquals(new Long(10), list.get(0));
+
+ // Second Map entry
+ resultStruct = (Map<?, ?>) resultSet.get(1);
+ assertEquals(4L, getResultKey(resultStruct));
+ list = (ArrayList<Long>) getResultValue(resultStruct);
+ assertEquals(2, list.size()); // value is a list with 1 element
+ assertEquals(new Long(15), list.get(0));
+ assertEquals(new Long(20), list.get(1));
+
+ // Third Map entry
+ resultStruct = (Map<?, ?>) resultSet.get(2);
+ assertEquals(5L, getResultKey(resultStruct));
+ list = (ArrayList<Long>) getResultValue(resultStruct);
+ assertEquals(3, list.size()); // value is a list with 1 element
+ assertEquals(new Long(25), list.get(0));
+ assertEquals(new Long(30), list.get(1));
+ assertEquals(new Long(35), list.get(2));
+
+ /* check underlying bitVector */
+ assertFalse(mapVector.isNull(0));
+ assertFalse(mapVector.isNull(1));
+
+ /* check underlying offsets */
+ final ArrowBuf offsetBuffer = mapVector.getOffsetBuffer();
+
+ /* mapVector has 2 entries at index 0 and 3 entries at index 1 */
+ assertEquals(0, offsetBuffer.getInt(0 * MapVector.OFFSET_WIDTH));
+ assertEquals(2, offsetBuffer.getInt(1 * MapVector.OFFSET_WIDTH));
+ assertEquals(5, offsetBuffer.getInt(2 * MapVector.OFFSET_WIDTH));
+ }
+ }
+
+ @Test
+ public void testMapWithMapValue() throws Exception {
+ try (MapVector mapVector = MapVector.empty("sourceVector", allocator, false)) {
+
+ UnionMapWriter mapWriter = mapVector.getWriter();
+ MapWriter valueWriter;
+
+ // we are essentially writing Map<Long, Map<Long, Long>>
+ // populate map vector with the following three records
+ // [
+ // null,
+ // [1:[50: 100, 200:400], 2:[75: 175, 150: 250]],
+ // [3:[10: 20], 4:[15: 20], 5:[25: 30, 35: null]]
+ // ]
+
+ /* write null at index 0 */
+ mapWriter.setPosition(0);
+ mapWriter.writeNull();
+
+ /* write one or more maps at index 1 */
+ mapWriter.setPosition(1);
+ mapWriter.startMap();
+
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(1);
+ valueWriter = mapWriter.value().map(false);
+ valueWriter.startMap();
+ writeEntry(valueWriter, 50, 100L);
+ writeEntry(valueWriter, 200, 400L);
+ valueWriter.endMap();
+ mapWriter.endEntry();
+
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(2);
+ valueWriter = mapWriter.value().map(false);
+ valueWriter.startMap();
+ writeEntry(valueWriter, 75, 175L);
+ writeEntry(valueWriter, 150, 250L);
+ valueWriter.endMap();
+ mapWriter.endEntry();
+
+ mapWriter.endMap();
+
+ /* write one or more maps at index 2 */
+ mapWriter.setPosition(2);
+ mapWriter.startMap();
+
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(3);
+ valueWriter = mapWriter.value().map(true);
+ valueWriter.startMap();
+ writeEntry(valueWriter, 10, 20L);
+ valueWriter.endMap();
+ mapWriter.endEntry();
+
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(4);
+ valueWriter = mapWriter.value().map(false);
+ valueWriter.startMap();
+ writeEntry(valueWriter, 15, 20L);
+ valueWriter.endMap();
+ mapWriter.endEntry();
+
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(5);
+ valueWriter = mapWriter.value().map(false);
+ valueWriter.startMap();
+ writeEntry(valueWriter, 25, 30L);
+ writeEntry(valueWriter, 35, (Long) null);
+ valueWriter.endMap();
+ mapWriter.endEntry();
+
+ mapWriter.endMap();
+
+ assertEquals(2, mapVector.getLastSet());
+
+ mapWriter.setValueCount(3);
+
+ assertEquals(3, mapVector.getValueCount());
+
+ // Get mapVector element at index 0
+ Object result = mapVector.getObject(0);
+ assertNull(result);
+
+ // Get mapVector element at index 1
+ result = mapVector.getObject(1);
+ ArrayList<?> resultSet = (ArrayList<?>) result;
+
+ // 2 map entries at index 0
+ assertEquals(2, resultSet.size());
+
+ // First Map entry
+ Map<?, ?> resultStruct = (Map<?, ?>) resultSet.get(0);
+ assertEquals(1L, getResultKey(resultStruct));
+ ArrayList<Map<?, ?>> list = (ArrayList<Map<?, ?>>) getResultValue(resultStruct);
+ assertEquals(2, list.size()); // value is a list of 2 two maps
+ Map<?, ?> innerMap = list.get(0);
+ assertEquals(50L, getResultKey(innerMap));
+ assertEquals(100L, getResultValue(innerMap));
+ innerMap = list.get(1);
+ assertEquals(200L, getResultKey(innerMap));
+ assertEquals(400L, getResultValue(innerMap));
+
+ // Second Map entry
+ resultStruct = (Map<?, ?>) resultSet.get(1);
+ assertEquals(2L, getResultKey(resultStruct));
+ list = (ArrayList<Map<?, ?>>) getResultValue(resultStruct);
+ assertEquals(2, list.size()); // value is a list of two maps
+ innerMap = list.get(0);
+ assertEquals(75L, getResultKey(innerMap));
+ assertEquals(175L, getResultValue(innerMap));
+ innerMap = list.get(1);
+ assertEquals(150L, getResultKey(innerMap));
+ assertEquals(250L, getResultValue(innerMap));
+
+ // Get mapVector element at index 2
+ result = mapVector.getObject(2);
+ resultSet = (ArrayList<?>) result;
+
+ // 3 map entries at index 1
+ assertEquals(3, resultSet.size());
+
+ // First Map entry
+ resultStruct = (Map<?, ?>) resultSet.get(0);
+ assertEquals(3L, getResultKey(resultStruct));
+ list = (ArrayList<Map<?, ?>>) getResultValue(resultStruct);
+ assertEquals(1, list.size()); // value is a list of maps with 1 element
+ innerMap = list.get(0);
+ assertEquals(10L, getResultKey(innerMap));
+ assertEquals(20L, getResultValue(innerMap));
+
+ // Second Map entry
+ resultStruct = (Map<?, ?>) resultSet.get(1);
+ assertEquals(4L, getResultKey(resultStruct));
+ list = (ArrayList<Map<?, ?>>) getResultValue(resultStruct);
+ assertEquals(1, list.size()); // value is a list of maps with 1 element
+ innerMap = list.get(0);
+ assertEquals(15L, getResultKey(innerMap));
+ assertEquals(20L, getResultValue(innerMap));
+
+ // Third Map entry
+ resultStruct = (Map<?, ?>) resultSet.get(2);
+ assertEquals(5L, getResultKey(resultStruct));
+ list = (ArrayList<Map<?, ?>>) getResultValue(resultStruct);
+ assertEquals(2, list.size()); // value is a list of maps with 2 elements
+ innerMap = list.get(0);
+ assertEquals(25L, getResultKey(innerMap));
+ assertEquals(30L, getResultValue(innerMap));
+ innerMap = list.get(1);
+ assertEquals(35L, getResultKey(innerMap));
+ assertNull(innerMap.get(MapVector.VALUE_NAME));
+
+ /* check underlying bitVector */
+ assertTrue(mapVector.isNull(0));
+ assertFalse(mapVector.isNull(1));
+ assertFalse(mapVector.isNull(2));
+
+ /* check underlying offsets */
+ final ArrowBuf offsetBuffer = mapVector.getOffsetBuffer();
+
+ /* mapVector has 0 entries at index 0, 2 entries at index 1, and 3 entries at index 2 */
+ assertEquals(0, offsetBuffer.getInt(0 * MapVector.OFFSET_WIDTH));
+ assertEquals(0, offsetBuffer.getInt(1 * MapVector.OFFSET_WIDTH));
+ assertEquals(2, offsetBuffer.getInt(2 * MapVector.OFFSET_WIDTH));
+ assertEquals(5, offsetBuffer.getInt(3 * MapVector.OFFSET_WIDTH));
+ }
+ }
+
+ @Test
+ public void testMapWithMapKeyAndMapValue() throws Exception {
+ try (MapVector mapVector = MapVector.empty("sourceVector", allocator, false)) {
+
+ UnionMapWriter mapWriter = mapVector.getWriter();
+ MapWriter keyWriter;
+ MapWriter valueWriter;
+
+ // we are essentially writing Map<Map<Integer, Integer>, Map<Long, Long>>
+ // populate map vector with the following two records
+ // [
+ // [[5: 10, 20: 40]:[50: 100, 200: 400], [50: 100]:[75: 175, 150: 250]],
+ // [[1: 2]:[10: 20], [30: 40]:[15: 20], [50: 60, 70: null]:[25: 30, 35: null], [5: null]: null]
+ // ]
+
+ mapWriter.setPosition(0);
+ mapWriter.startMap();
+
+ mapWriter.startEntry();
+ keyWriter = mapWriter.key().map(false);
+ keyWriter.startMap();
+ writeEntry(keyWriter, 5, 10);
+ writeEntry(keyWriter, 20, 40);
+ keyWriter.endMap();
+ valueWriter = mapWriter.value().map(false);
+ valueWriter.startMap();
+ writeEntry(valueWriter, 50, 100L);
+ writeEntry(valueWriter, 200, 400L);
+ valueWriter.endMap();
+ mapWriter.endEntry();
+
+ mapWriter.startEntry();
+ keyWriter = mapWriter.key().map(false);
+ keyWriter.startMap();
+ writeEntry(keyWriter, 50, 100);
+ keyWriter.endMap();
+ valueWriter = mapWriter.value().map(false);
+ valueWriter.startMap();
+ writeEntry(valueWriter, 75, 175L);
+ writeEntry(valueWriter, 150, 250L);
+ valueWriter.endMap();
+ mapWriter.endEntry();
+
+ mapWriter.endMap();
+
+ /* write one or more maps at index 1 */
+ mapWriter.setPosition(1);
+ mapWriter.startMap();
+
+ mapWriter.startEntry();
+ keyWriter = mapWriter.key().map(false);
+ keyWriter.startMap();
+ writeEntry(keyWriter, 1, 2);
+ keyWriter.endMap();
+ valueWriter = mapWriter.value().map(true);
+ valueWriter.startMap();
+ writeEntry(valueWriter, 10, 20L);
+ valueWriter.endMap();
+ mapWriter.endEntry();
+
+ mapWriter.startEntry();
+ keyWriter = mapWriter.key().map(false);
+ keyWriter.startMap();
+ writeEntry(keyWriter, 30, 40);
+ keyWriter.endMap();
+ valueWriter = mapWriter.value().map(false);
+ valueWriter.startMap();
+ writeEntry(valueWriter, 15, 20L);
+ valueWriter.endMap();
+ mapWriter.endEntry();
+
+ mapWriter.startEntry();
+ keyWriter = mapWriter.key().map(false);
+ keyWriter.startMap();
+ writeEntry(keyWriter, 50, 60);
+ writeEntry(keyWriter, 70, (Integer) null);
+ keyWriter.endMap();
+ valueWriter = mapWriter.value().map(false);
+ valueWriter.startMap();
+ writeEntry(valueWriter, 25, 30L);
+ writeEntry(valueWriter, 35, (Long) null);
+ valueWriter.endMap();
+ mapWriter.endEntry();
+
+ mapWriter.startEntry();
+ keyWriter = mapWriter.key().map(false);
+ keyWriter.startMap();
+ writeEntry(keyWriter, 5, (Integer) null);
+ keyWriter.endMap();
+ valueWriter = mapWriter.value().map(false);
+ valueWriter.writeNull();
+ mapWriter.endEntry();
+
+ mapWriter.endMap();
+
+ assertEquals(1, mapVector.getLastSet());
+
+ mapWriter.setValueCount(2);
+
+ assertEquals(2, mapVector.getValueCount());
+
+ // Get mapVector element at index 0
+ Object result = mapVector.getObject(0);
+ ArrayList<?> resultSet = (ArrayList<?>) result;
+
+ // 2 map entries at index 0
+ assertEquals(2, resultSet.size());
+
+ // First Map entry
+ Map<?, ArrayList<Map<?, ?>>> resultStruct = (Map<?, ArrayList<Map<?, ?>>>) resultSet.get(0);
+ ArrayList<Map<?, ?>> list = getResultKey(resultStruct);
+ assertEquals(2, list.size()); // key is a list of 2 two maps
+ Map<?, ?> innerMap = list.get(0);
+ assertEquals(5, getResultKey(innerMap));
+ assertEquals(10, getResultValue(innerMap));
+ innerMap = list.get(1);
+ assertEquals(20, getResultKey(innerMap));
+ assertEquals(40, getResultValue(innerMap));
+
+ list = getResultValue(resultStruct);
+ assertEquals(2, list.size()); // value is a list of 2 two maps
+ innerMap = list.get(0);
+ assertEquals(50L, getResultKey(innerMap));
+ assertEquals(100L, getResultValue(innerMap));
+ innerMap = list.get(1);
+ assertEquals(200L, getResultKey(innerMap));
+ assertEquals(400L, getResultValue(innerMap));
+
+ // Second Map entry
+ resultStruct = (Map<?, ArrayList<Map<?, ?>>>) resultSet.get(1);
+ list = getResultKey(resultStruct);
+ assertEquals(1, list.size()); // key is a list of 1 two map
+ innerMap = list.get(0);
+ assertEquals(50, getResultKey(innerMap));
+ assertEquals(100, getResultValue(innerMap));
+
+ list = getResultValue(resultStruct);
+ assertEquals(2, list.size()); // value is a list of two maps
+ innerMap = list.get(0);
+ assertEquals(75L, getResultKey(innerMap));
+ assertEquals(175L, getResultValue(innerMap));
+ innerMap = list.get(1);
+ assertEquals(150L, getResultKey(innerMap));
+ assertEquals(250L, getResultValue(innerMap));
+
+ // Get mapVector element at index 1
+ result = mapVector.getObject(1);
+ resultSet = (ArrayList<?>) result;
+
+ // 4 map entries at index 1
+ assertEquals(4, resultSet.size());
+
+ // First Map entry
+ resultStruct = (Map<?, ArrayList<Map<?, ?>>>) resultSet.get(0);
+ list = getResultKey(resultStruct);
+ assertEquals(1, list.size()); // key is a list of 1 map
+ innerMap = list.get(0);
+ assertEquals(1, getResultKey(innerMap));
+ assertEquals(2, getResultValue(innerMap));
+
+ list = getResultValue(resultStruct);
+ assertEquals(1, list.size()); // value is a list of maps with 1 element
+ innerMap = list.get(0);
+ assertEquals(10L, getResultKey(innerMap));
+ assertEquals(20L, getResultValue(innerMap));
+
+ // Second Map entry
+ resultStruct = (Map<?, ArrayList<Map<?, ?>>>) resultSet.get(1);
+ list = getResultKey(resultStruct);
+ assertEquals(1, list.size()); // key is a list of 1 map
+ innerMap = list.get(0);
+ assertEquals(30, getResultKey(innerMap));
+ assertEquals(40, getResultValue(innerMap));
+
+ list = getResultValue(resultStruct);
+ assertEquals(1, list.size()); // value is a list of maps with 1 element
+ innerMap = list.get(0);
+ assertEquals(15L, getResultKey(innerMap));
+ assertEquals(20L, getResultValue(innerMap));
+
+ // Third Map entry
+ resultStruct = (Map<?, ArrayList<Map<?, ?>>>) resultSet.get(2);
+ list = getResultKey(resultStruct);
+ assertEquals(2, list.size()); // key is a list of two maps
+ innerMap = list.get(0);
+ assertEquals(50, getResultKey(innerMap));
+ assertEquals(60, getResultValue(innerMap));
+ innerMap = list.get(1);
+ assertEquals(70, getResultKey(innerMap));
+ assertNull(innerMap.get(MapVector.VALUE_NAME));
+
+ list = getResultValue(resultStruct);
+ assertEquals(2, list.size()); // value is a list of maps with 2 elements
+ innerMap = list.get(0);
+ assertEquals(25L, getResultKey(innerMap));
+ assertEquals(30L, getResultValue(innerMap));
+ innerMap = list.get(1);
+ assertEquals(35L, getResultKey(innerMap));
+ assertNull(innerMap.get(MapVector.VALUE_NAME));
+
+ // Fourth Map entry
+ resultStruct = (Map<?, ArrayList<Map<?, ?>>>) resultSet.get(3);
+ list = getResultKey(resultStruct);
+ assertEquals(1, list.size()); // key is a list of two maps
+ innerMap = list.get(0);
+ assertEquals(5, getResultKey(innerMap));
+ assertNull(innerMap.get(MapVector.VALUE_NAME));
+
+ assertNull(resultStruct.get(MapVector.VALUE_NAME));
+
+ /* check underlying bitVector */
+ assertFalse(mapVector.isNull(0));
+ assertFalse(mapVector.isNull(1));
+
+ /* check underlying offsets */
+ final ArrowBuf offsetBuffer = mapVector.getOffsetBuffer();
+
+ /* mapVector has 2 entries at index 0 and 4 entries at index 1 */
+ assertEquals(0, offsetBuffer.getInt(0 * MapVector.OFFSET_WIDTH));
+ assertEquals(2, offsetBuffer.getInt(1 * MapVector.OFFSET_WIDTH));
+ assertEquals(6, offsetBuffer.getInt(2 * MapVector.OFFSET_WIDTH));
+ }
+ }
+
+ private void writeEntry(MapWriter writer, long key, Long value) {
+ writer.startEntry();
+ writer.key().bigInt().writeBigInt(key);
+ if (value != null) {
+ writer.value().bigInt().writeBigInt(value);
+ }
+ writer.endEntry();
+ }
+
+ private void writeEntry(MapWriter writer, int key, Integer value) {
+ writer.startEntry();
+ writer.key().integer().writeInt(key);
+ if (value != null) {
+ writer.value().integer().writeInt(value);
+ }
+ writer.endEntry();
+ }
+
+ @Test
+ public void testClearAndReuse() {
+ try (final MapVector vector = MapVector.empty("map", allocator, false)) {
+ vector.allocateNew();
+ UnionMapWriter mapWriter = vector.getWriter();
+
+ mapWriter.startMap();
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(1);
+ mapWriter.value().integer().writeInt(11);
+ mapWriter.endEntry();
+ mapWriter.endMap();
+
+ mapWriter.startMap();
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(2);
+ mapWriter.value().integer().writeInt(22);
+ mapWriter.endEntry();
+ mapWriter.endMap();
+
+ mapWriter.setValueCount(2);
+
+ Object result = vector.getObject(0);
+ ArrayList<?> resultSet = (ArrayList<?>) result;
+ Map<?, ?> resultStruct = (Map<?, ?>) resultSet.get(0);
+ assertEquals(1L, getResultKey(resultStruct));
+ assertEquals(11, getResultValue(resultStruct));
+
+ result = vector.getObject(1);
+ resultSet = (ArrayList<?>) result;
+ resultStruct = (Map<?, ?>) resultSet.get(0);
+ assertEquals(2L, getResultKey(resultStruct));
+ assertEquals(22, getResultValue(resultStruct));
+
+ // Clear and release the buffers to trigger a realloc when adding next value
+ vector.clear();
+ mapWriter = new UnionMapWriter(vector);
+
+ // The map vector should reuse a buffer when reallocating the offset buffer
+ mapWriter.startMap();
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(3);
+ mapWriter.value().integer().writeInt(33);
+ mapWriter.endEntry();
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(4);
+ mapWriter.value().integer().writeInt(44);
+ mapWriter.endEntry();
+ mapWriter.endMap();
+
+ mapWriter.startMap();
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(5);
+ mapWriter.value().integer().writeInt(55);
+ mapWriter.endEntry();
+ mapWriter.endMap();
+
+ mapWriter.setValueCount(2);
+
+ result = vector.getObject(0);
+ resultSet = (ArrayList<?>) result;
+ resultStruct = (Map<?, ?>) resultSet.get(0);
+ assertEquals(3L, getResultKey(resultStruct));
+ assertEquals(33, getResultValue(resultStruct));
+ resultStruct = (Map<?, ?>) resultSet.get(1);
+ assertEquals(4L, getResultKey(resultStruct));
+ assertEquals(44, getResultValue(resultStruct));
+
+ result = vector.getObject(1);
+ resultSet = (ArrayList<?>) result;
+ resultStruct = (Map<?, ?>) resultSet.get(0);
+ assertEquals(5L, getResultKey(resultStruct));
+ assertEquals(55, getResultValue(resultStruct));
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestNullCheckingForGet.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestNullCheckingForGet.java
new file mode 100644
index 000000000..f1345e88a
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestNullCheckingForGet.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import java.lang.reflect.Field;
+import java.net.URLClassLoader;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Test cases for {@link NullCheckingForGet}.
+ */
+public class TestNullCheckingForGet {
+
+ /**
+ * Get a copy of the current class loader.
+ * @return the newly created class loader.
+ */
+ private ClassLoader copyClassLoader() {
+ ClassLoader curClassLoader = this.getClass().getClassLoader();
+ if (curClassLoader instanceof URLClassLoader) {
+ // for Java 1.8
+ return new URLClassLoader(((URLClassLoader) curClassLoader).getURLs(), null);
+ }
+
+ // for Java 1.9 and Java 11.
+ return null;
+ }
+
+ /**
+ * Get the value of flag {@link NullCheckingForGet#NULL_CHECKING_ENABLED}.
+ * @param classLoader the class loader from which to get the flag value.
+ * @return value of the flag.
+ */
+ private boolean getFlagValue(ClassLoader classLoader) throws Exception {
+ Class<?> clazz = classLoader.loadClass("org.apache.arrow.vector.NullCheckingForGet");
+ Field field = clazz.getField("NULL_CHECKING_ENABLED");
+ return (Boolean) field.get(null);
+ }
+
+ /**
+ * Ensure the flag for null checking is enabled by default.
+ * This will protect users from JVM crashes.
+ */
+ @Test
+ public void testDefaultValue() throws Exception {
+ ClassLoader classLoader = copyClassLoader();
+ if (classLoader != null) {
+ boolean nullCheckingEnabled = getFlagValue(classLoader);
+ Assert.assertTrue(nullCheckingEnabled);
+ }
+ }
+
+ /**
+ * Test setting the null checking flag by the system property.
+ * @throws Exception if loading class {@link NullCheckingForGet#NULL_CHECKING_ENABLED} fails.
+ */
+ @Test
+ public void testEnableSysProperty() throws Exception {
+ String sysProperty = System.getProperty("arrow.enable_null_check_for_get");
+ System.setProperty("arrow.enable_null_check_for_get", "false");
+
+ ClassLoader classLoader = copyClassLoader();
+ if (classLoader != null) {
+ boolean nullCheckingEnabled = getFlagValue(classLoader);
+ Assert.assertFalse(nullCheckingEnabled);
+ }
+
+ // restore system property
+ if (sysProperty != null) {
+ System.setProperty("arrow.enable_null_check_for_get", sysProperty);
+ } else {
+ System.clearProperty("arrow.enable_null_check_for_get");
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestOutOfMemoryForValueVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestOutOfMemoryForValueVector.java
new file mode 100644
index 000000000..7f26b5c1b
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestOutOfMemoryForValueVector.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.OutOfMemoryException;
+import org.apache.arrow.memory.RootAllocator;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * This class tests cases where we expect to receive {@link OutOfMemoryException}.
+ */
+public class TestOutOfMemoryForValueVector {
+
+ private static final String EMPTY_SCHEMA_PATH = "";
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(200); // Start with low memory limit
+ }
+
+ @Test(expected = OutOfMemoryException.class)
+ public void variableWidthVectorAllocateNew() {
+ try (VarCharVector vector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator)) {
+ vector.allocateNew();
+ }
+ }
+
+ @Test(expected = OutOfMemoryException.class)
+ public void variableWidthVectorAllocateNewCustom() {
+ try (VarCharVector vector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator)) {
+ vector.allocateNew(2342, 234);
+ }
+ }
+
+ @Test(expected = OutOfMemoryException.class)
+ public void fixedWidthVectorAllocateNew() {
+ try (IntVector vector = new IntVector(EMPTY_SCHEMA_PATH, allocator)) {
+ vector.allocateNew();
+ }
+ }
+
+ @Test(expected = OutOfMemoryException.class)
+ public void fixedWidthVectorAllocateNewCustom() {
+ try (IntVector vector = new IntVector(EMPTY_SCHEMA_PATH, allocator)) {
+ vector.allocateNew(2342);
+ }
+ }
+
+ @After
+ public void terminate() {
+ allocator.close();
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestOversizedAllocationForValueVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestOversizedAllocationForValueVector.java
new file mode 100644
index 000000000..23414e9f5
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestOversizedAllocationForValueVector.java
@@ -0,0 +1,132 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.util.OversizedAllocationException;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * This class tests that OversizedAllocationException occurs when a large memory is allocated for a vector.
+ * Typically, arrow allows the allocation of the size of at most Integer.MAX_VALUE, but this might cause OOM in tests.
+ * Thus, the max allocation size is limited to 1 KB in this class. Please see the surefire option in pom.xml.
+ */
+public class TestOversizedAllocationForValueVector {
+
+ private static final String EMPTY_SCHEMA_PATH = "";
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test(expected = OversizedAllocationException.class)
+ public void testFixedVectorReallocation() {
+ final UInt4Vector vector = new UInt4Vector(EMPTY_SCHEMA_PATH, allocator);
+ // edge case 1: buffer size = max value capacity
+ final int expectedValueCapacity = checkedCastToInt(BaseValueVector.MAX_ALLOCATION_SIZE / 4);
+ try {
+ vector.allocateNew(expectedValueCapacity);
+ assertEquals(expectedValueCapacity, vector.getValueCapacity());
+ vector.reAlloc();
+ assertEquals(expectedValueCapacity * 2, vector.getValueCapacity());
+ } finally {
+ vector.close();
+ }
+
+ // common case: value count < max value capacity
+ try {
+ vector.allocateNew(checkedCastToInt(BaseValueVector.MAX_ALLOCATION_SIZE / 8));
+ vector.reAlloc(); // value allocation reaches to MAX_VALUE_ALLOCATION
+ vector.reAlloc(); // this should throw an IOOB
+ } finally {
+ vector.close();
+ }
+ }
+
+ @Test(expected = OversizedAllocationException.class)
+ public void testBitVectorReallocation() {
+ final BitVector vector = new BitVector(EMPTY_SCHEMA_PATH, allocator);
+ // edge case 1: buffer size ~ max value capacity
+ final int expectedValueCapacity = 1 << 29;
+ try {
+ vector.allocateNew(expectedValueCapacity);
+ assertEquals(expectedValueCapacity, vector.getValueCapacity());
+ vector.reAlloc();
+ assertEquals(expectedValueCapacity * 2, vector.getValueCapacity());
+ } finally {
+ vector.close();
+ }
+
+ // common: value count < MAX_VALUE_ALLOCATION
+ try {
+ vector.allocateNew(expectedValueCapacity);
+ for (int i = 0; i < 3; i++) {
+ vector.reAlloc(); // expand buffer size
+ }
+ assertEquals(Integer.MAX_VALUE, vector.getValueCapacity());
+ vector.reAlloc(); // buffer size ~ max allocation
+ assertEquals(Integer.MAX_VALUE, vector.getValueCapacity());
+ vector.reAlloc(); // overflow
+ } finally {
+ vector.close();
+ }
+ }
+
+
+ @Test(expected = OversizedAllocationException.class)
+ public void testVariableVectorReallocation() {
+ final VarCharVector vector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator);
+ // edge case 1: value count = MAX_VALUE_ALLOCATION
+ final long expectedAllocationInBytes = BaseValueVector.MAX_ALLOCATION_SIZE;
+ final int expectedOffsetSize = 10;
+ try {
+ vector.allocateNew(expectedAllocationInBytes, 10);
+ assertTrue(expectedOffsetSize <= vector.getValueCapacity());
+ assertTrue(expectedAllocationInBytes <= vector.getDataBuffer().capacity());
+ vector.reAlloc();
+ assertTrue(expectedOffsetSize * 2 <= vector.getValueCapacity());
+ assertTrue(expectedAllocationInBytes * 2 <= vector.getDataBuffer().capacity());
+ } finally {
+ vector.close();
+ }
+
+ // common: value count < MAX_VALUE_ALLOCATION
+ try {
+ vector.allocateNew(BaseValueVector.MAX_ALLOCATION_SIZE / 2, 0);
+ vector.reAlloc(); // value allocation reaches to MAX_VALUE_ALLOCATION
+ vector.reAlloc(); // this tests if it overflows
+ } finally {
+ vector.close();
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestPeriodDuration.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestPeriodDuration.java
new file mode 100644
index 000000000..c8965dec3
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestPeriodDuration.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
+
+import java.time.Duration;
+import java.time.Period;
+
+import org.junit.Test;
+
+public class TestPeriodDuration {
+
+ @Test
+ public void testBasics() {
+ PeriodDuration pd1 = new PeriodDuration(Period.of(1, 2, 3), Duration.ofNanos(123));
+ PeriodDuration pdEq1 = new PeriodDuration(Period.of(1, 2, 3), Duration.ofNanos(123));
+ PeriodDuration pd2 = new PeriodDuration(Period.of(1, 2, 3), Duration.ofNanos(12));
+ PeriodDuration pd3 = new PeriodDuration(Period.of(-1, -2, -3), Duration.ofNanos(-123));
+
+ assertEquals(pd1, pdEq1);
+ assertEquals(pd1.hashCode(), pdEq1.hashCode());
+
+ assertNotEquals(pd1, pd2);
+ assertNotEquals(pd1.hashCode(), pd2.hashCode());
+ assertNotEquals(pd1, pd3);
+ assertNotEquals(pd1.hashCode(), pd3.hashCode());
+ }
+
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java
new file mode 100644
index 000000000..e60b87e60
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java
@@ -0,0 +1,410 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.types.pojo.ArrowType.Struct;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.jupiter.api.Assertions;
+
+public class TestSplitAndTransfer {
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ private void populateVarcharVector(final VarCharVector vector, int valueCount, String[] compareArray) {
+ for (int i = 0; i < valueCount; i += 3) {
+ final String s = String.format("%010d", i);
+ vector.set(i, s.getBytes());
+ if (compareArray != null) {
+ compareArray[i] = s;
+ }
+ }
+ vector.setValueCount(valueCount);
+ }
+
+ @Test /* VarCharVector */
+ public void test() throws Exception {
+ try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator)) {
+ varCharVector.allocateNew(10000, 1000);
+
+ final int valueCount = 500;
+ final String[] compareArray = new String[valueCount];
+
+ populateVarcharVector(varCharVector, valueCount, compareArray);
+
+ final TransferPair tp = varCharVector.getTransferPair(allocator);
+ final VarCharVector newVarCharVector = (VarCharVector) tp.getTo();
+ final int[][] startLengths = {{0, 201}, {201, 0}, {201, 200}, {401, 99}};
+
+ for (final int[] startLength : startLengths) {
+ final int start = startLength[0];
+ final int length = startLength[1];
+ tp.splitAndTransfer(start, length);
+ for (int i = 0; i < length; i++) {
+ final boolean expectedSet = ((start + i) % 3) == 0;
+ if (expectedSet) {
+ final byte[] expectedValue = compareArray[start + i].getBytes();
+ assertFalse(newVarCharVector.isNull(i));
+ assertArrayEquals(expectedValue, newVarCharVector.get(i));
+ } else {
+ assertTrue(newVarCharVector.isNull(i));
+ }
+ }
+ newVarCharVector.clear();
+ }
+ }
+ }
+
+ @Test
+ public void testMemoryConstrainedTransfer() {
+ try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator)) {
+ allocator.setLimit(32768); /* set limit of 32KB */
+
+ varCharVector.allocateNew(10000, 1000);
+
+ final int valueCount = 1000;
+
+ populateVarcharVector(varCharVector, valueCount, null);
+
+ final TransferPair tp = varCharVector.getTransferPair(allocator);
+ final VarCharVector newVarCharVector = (VarCharVector) tp.getTo();
+ final int[][] startLengths = {{0, 700}, {700, 299}};
+
+ for (final int[] startLength : startLengths) {
+ final int start = startLength[0];
+ final int length = startLength[1];
+ tp.splitAndTransfer(start, length);
+ newVarCharVector.clear();
+ }
+ }
+ }
+
+ @Test
+ public void testTransfer() {
+ try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator)) {
+ varCharVector.allocateNew(10000, 1000);
+
+ final int valueCount = 500;
+ final String[] compareArray = new String[valueCount];
+ populateVarcharVector(varCharVector, valueCount, compareArray);
+
+ final TransferPair tp = varCharVector.getTransferPair(allocator);
+ final VarCharVector newVarCharVector = (VarCharVector) tp.getTo();
+ tp.transfer();
+
+ assertEquals(0, varCharVector.valueCount);
+ assertEquals(valueCount, newVarCharVector.valueCount);
+
+ for (int i = 0; i < valueCount; i++) {
+ final boolean expectedSet = (i % 3) == 0;
+ if (expectedSet) {
+ final byte[] expectedValue = compareArray[i].getBytes();
+ assertFalse(newVarCharVector.isNull(i));
+ assertArrayEquals(expectedValue, newVarCharVector.get(i));
+ } else {
+ assertTrue(newVarCharVector.isNull(i));
+ }
+ }
+
+ newVarCharVector.clear();
+ }
+ }
+
+ @Test
+ public void testCopyValueSafe() {
+ try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator);
+ final VarCharVector newVarCharVector = new VarCharVector("newvector", allocator)) {
+ varCharVector.allocateNew(10000, 1000);
+
+ final int valueCount = 500;
+ populateVarcharVector(varCharVector, valueCount, null);
+
+ final TransferPair tp = varCharVector.makeTransferPair(newVarCharVector);
+
+ // new vector memory is not pre-allocated, we expect copyValueSafe work fine.
+ for (int i = 0; i < valueCount; i++) {
+ tp.copyValueSafe(i, i);
+ }
+ newVarCharVector.setValueCount(valueCount);
+
+ for (int i = 0; i < valueCount; i++) {
+ final boolean expectedSet = (i % 3) == 0;
+ if (expectedSet) {
+ assertFalse(varCharVector.isNull(i));
+ assertFalse(newVarCharVector.isNull(i));
+ assertArrayEquals(varCharVector.get(i), newVarCharVector.get(i));
+ } else {
+ assertTrue(newVarCharVector.isNull(i));
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testSplitAndTransferNon() {
+ try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator)) {
+
+ varCharVector.allocateNew(10000, 1000);
+ final int valueCount = 500;
+ populateVarcharVector(varCharVector, valueCount, null);
+
+ final TransferPair tp = varCharVector.getTransferPair(allocator);
+ VarCharVector newVarCharVector = (VarCharVector) tp.getTo();
+
+ tp.splitAndTransfer(0, 0);
+ assertEquals(0, newVarCharVector.getValueCount());
+
+ newVarCharVector.clear();
+ }
+ }
+
+ @Test
+ public void testSplitAndTransferAll() {
+ try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator)) {
+
+ varCharVector.allocateNew(10000, 1000);
+ final int valueCount = 500;
+ populateVarcharVector(varCharVector, valueCount, null);
+
+ final TransferPair tp = varCharVector.getTransferPair(allocator);
+ VarCharVector newVarCharVector = (VarCharVector) tp.getTo();
+
+ tp.splitAndTransfer(0, valueCount);
+ assertEquals(valueCount, newVarCharVector.getValueCount());
+
+ newVarCharVector.clear();
+ }
+ }
+
+ @Test
+ public void testInvalidStartIndex() {
+ try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator);
+ final VarCharVector newVarCharVector = new VarCharVector("newvector", allocator)) {
+
+ varCharVector.allocateNew(10000, 1000);
+ final int valueCount = 500;
+ populateVarcharVector(varCharVector, valueCount, null);
+
+ final TransferPair tp = varCharVector.makeTransferPair(newVarCharVector);
+
+ IllegalArgumentException e = Assertions.assertThrows(
+ IllegalArgumentException.class,
+ () -> tp.splitAndTransfer(valueCount, 10));
+
+ assertEquals("Invalid parameters startIndex: 500, length: 10 for valueCount: 500", e.getMessage());
+
+ newVarCharVector.clear();
+ }
+ }
+
+ @Test
+ public void testInvalidLength() {
+ try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator);
+ final VarCharVector newVarCharVector = new VarCharVector("newvector", allocator)) {
+
+ varCharVector.allocateNew(10000, 1000);
+ final int valueCount = 500;
+ populateVarcharVector(varCharVector, valueCount, null);
+
+ final TransferPair tp = varCharVector.makeTransferPair(newVarCharVector);
+
+ IllegalArgumentException e = Assertions.assertThrows(
+ IllegalArgumentException.class,
+ () -> tp.splitAndTransfer(0, valueCount * 2));
+
+ assertEquals("Invalid parameters startIndex: 0, length: 1000 for valueCount: 500", e.getMessage());
+
+ newVarCharVector.clear();
+ }
+ }
+
+ @Test
+ public void testZeroStartIndexAndLength() {
+ try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator);
+ final VarCharVector newVarCharVector = new VarCharVector("newvector", allocator)) {
+
+ varCharVector.allocateNew(0, 0);
+ final int valueCount = 0;
+ populateVarcharVector(varCharVector, valueCount, null);
+
+ final TransferPair tp = varCharVector.makeTransferPair(newVarCharVector);
+
+ tp.splitAndTransfer(0, 0);
+ assertEquals(valueCount, newVarCharVector.getValueCount());
+
+ newVarCharVector.clear();
+ }
+ }
+
+ @Test
+ public void testZeroLength() {
+ try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator);
+ final VarCharVector newVarCharVector = new VarCharVector("newvector", allocator)) {
+
+ varCharVector.allocateNew(10000, 1000);
+ final int valueCount = 500;
+ populateVarcharVector(varCharVector, valueCount, null);
+
+ final TransferPair tp = varCharVector.makeTransferPair(newVarCharVector);
+
+ tp.splitAndTransfer(500, 0);
+ assertEquals(0, newVarCharVector.getValueCount());
+
+ newVarCharVector.clear();
+ }
+ }
+
+ @Test
+ public void testUnionVectorZeroStartIndexAndLength() {
+ try (final UnionVector unionVector = UnionVector.empty("myvector", allocator);
+ final UnionVector newUnionVector = UnionVector.empty("newvector", allocator)) {
+
+ unionVector.allocateNew();
+ final int valueCount = 0;
+ unionVector.setValueCount(valueCount);
+
+ final TransferPair tp = unionVector.makeTransferPair(newUnionVector);
+
+ tp.splitAndTransfer(0, 0);
+ assertEquals(valueCount, newUnionVector.getValueCount());
+
+ newUnionVector.clear();
+ }
+ }
+
+ @Test
+ public void testFixedWidthVectorZeroStartIndexAndLength() {
+ try (final IntVector intVector = new IntVector("myvector", allocator);
+ final IntVector newIntVector = new IntVector("newvector", allocator)) {
+
+ intVector.allocateNew(0);
+ final int valueCount = 0;
+ intVector.setValueCount(valueCount);
+
+ final TransferPair tp = intVector.makeTransferPair(newIntVector);
+
+ tp.splitAndTransfer(0, 0);
+ assertEquals(valueCount, newIntVector.getValueCount());
+
+ newIntVector.clear();
+ }
+ }
+
+ @Test
+ public void testBitVectorZeroStartIndexAndLength() {
+ try (final BitVector bitVector = new BitVector("myvector", allocator);
+ final BitVector newBitVector = new BitVector("newvector", allocator)) {
+
+ bitVector.allocateNew(0);
+ final int valueCount = 0;
+ bitVector.setValueCount(valueCount);
+
+ final TransferPair tp = bitVector.makeTransferPair(newBitVector);
+
+ tp.splitAndTransfer(0, 0);
+ assertEquals(valueCount, newBitVector.getValueCount());
+
+ newBitVector.clear();
+ }
+ }
+
+ @Test
+ public void testFixedSizeListVectorZeroStartIndexAndLength() {
+ try (final FixedSizeListVector listVector = FixedSizeListVector.empty("list", 4, allocator);
+ final FixedSizeListVector newListVector = FixedSizeListVector.empty("newList", 4, allocator)) {
+
+ listVector.allocateNew();
+ final int valueCount = 0;
+ listVector.setValueCount(valueCount);
+
+ final TransferPair tp = listVector.makeTransferPair(newListVector);
+
+ tp.splitAndTransfer(0, 0);
+ assertEquals(valueCount, newListVector.getValueCount());
+
+ newListVector.clear();
+ }
+ }
+
+ @Test
+ public void testListVectorZeroStartIndexAndLength() {
+ try (final ListVector listVector = ListVector.empty("list", allocator);
+ final ListVector newListVector = ListVector.empty("newList", allocator)) {
+
+ listVector.allocateNew();
+ final int valueCount = 0;
+ listVector.setValueCount(valueCount);
+
+ final TransferPair tp = listVector.makeTransferPair(newListVector);
+
+ tp.splitAndTransfer(0, 0);
+ assertEquals(valueCount, newListVector.getValueCount());
+
+ newListVector.clear();
+ }
+ }
+
+ @Test
+ public void testStructVectorZeroStartIndexAndLength() {
+ Map<String, String> metadata = new HashMap<>();
+ metadata.put("k1", "v1");
+ FieldType type = new FieldType(true, Struct.INSTANCE, null, metadata);
+ try (final StructVector structVector = new StructVector("structvec", allocator, type, null);
+ final StructVector newStructVector = new StructVector("newStructvec", allocator, type, null)) {
+
+ structVector.allocateNew();
+ final int valueCount = 0;
+ structVector.setValueCount(valueCount);
+
+ final TransferPair tp = structVector.makeTransferPair(newStructVector);
+
+ tp.splitAndTransfer(0, 0);
+ assertEquals(valueCount, newStructVector.getValueCount());
+
+ newStructVector.clear();
+ }
+ }
+
+
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java
new file mode 100644
index 000000000..734ff4631
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java
@@ -0,0 +1,183 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.*;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.holders.ComplexHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType.Struct;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestStructVector {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testFieldMetadata() throws Exception {
+ Map<String, String> metadata = new HashMap<>();
+ metadata.put("k1", "v1");
+ FieldType type = new FieldType(true, Struct.INSTANCE, null, metadata);
+ try (StructVector vector = new StructVector("struct", allocator, type, null)) {
+ Assert.assertEquals(vector.getField().getMetadata(), type.getMetadata());
+ }
+ }
+
+ @Test
+ public void testMakeTransferPair() {
+ try (final StructVector s1 = StructVector.empty("s1", allocator);
+ final StructVector s2 = StructVector.empty("s2", allocator)) {
+ s1.addOrGet("struct_child", FieldType.nullable(MinorType.INT.getType()), IntVector.class);
+ s1.makeTransferPair(s2);
+ final FieldVector child = s1.getChild("struct_child");
+ final FieldVector toChild = s2.addOrGet("struct_child", child.getField().getFieldType(), child.getClass());
+ assertEquals(0, toChild.getValueCapacity());
+ assertEquals(0, toChild.getDataBuffer().capacity());
+ assertEquals(0, toChild.getValidityBuffer().capacity());
+ }
+ }
+
+ @Test
+ public void testAllocateAfterReAlloc() throws Exception {
+ Map<String, String> metadata = new HashMap<>();
+ metadata.put("k1", "v1");
+ FieldType type = new FieldType(true, Struct.INSTANCE, null, metadata);
+ try (StructVector vector = new StructVector("struct", allocator, type, null)) {
+ MinorType childtype = MinorType.INT;
+ vector.addOrGet("intchild", FieldType.nullable(childtype.getType()), IntVector.class);
+
+ /*
+ * Allocate the default size, and then, reAlloc. This should double the allocation.
+ */
+ vector.allocateNewSafe(); // Initial allocation
+ vector.reAlloc(); // Double the allocation size of self, and all children.
+ long savedValidityBufferCapacity = vector.getValidityBuffer().capacity();
+ int savedValueCapacity = vector.getValueCapacity();
+
+ /*
+ * Clear and allocate again.
+ */
+ vector.clear();
+ vector.allocateNewSafe();
+
+ /*
+ * Verify that the buffer sizes haven't changed.
+ */
+ Assert.assertEquals(vector.getValidityBuffer().capacity(), savedValidityBufferCapacity);
+ Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity);
+ }
+ }
+
+ @Test
+ public void testReadNullValue() {
+ Map<String, String> metadata = new HashMap<>();
+ metadata.put("k1", "v1");
+ FieldType type = new FieldType(true, Struct.INSTANCE, null, metadata);
+ try (StructVector vector = new StructVector("struct", allocator, type, null)) {
+ MinorType childtype = MinorType.INT;
+ vector.addOrGet("intchild", FieldType.nullable(childtype.getType()), IntVector.class);
+ vector.setValueCount(2);
+
+ IntVector intVector = (IntVector) vector.getChild("intchild");
+ intVector.setSafe(0, 100);
+ vector.setIndexDefined(0);
+ intVector.setNull(1);
+ vector.setNull(1);
+
+ ComplexHolder holder = new ComplexHolder();
+ vector.get(0, holder);
+ assertNotEquals(0, holder.isSet);
+ assertNotNull(holder.reader);
+
+ vector.get(1, holder);
+ assertEquals(0, holder.isSet);
+ assertNull(holder.reader);
+ }
+ }
+
+ @Test
+ public void testGetPrimitiveVectors() {
+ FieldType type = new FieldType(true, Struct.INSTANCE, null, null);
+ try (StructVector vector = new StructVector("struct", allocator, type, null)) {
+
+ // add list vector
+ vector.addOrGet("list", FieldType.nullable(MinorType.LIST.getType()), ListVector.class);
+ ListVector listVector = vector.addOrGetList("list");
+ listVector.addOrGetVector(FieldType.nullable(MinorType.INT.getType()));
+
+ // add union vector
+ vector.addOrGet("union", FieldType.nullable(MinorType.UNION.getType()), UnionVector.class);
+ UnionVector unionVector = vector.addOrGetUnion("union");
+ unionVector.addVector(new BigIntVector("bigInt", allocator));
+ unionVector.addVector(new SmallIntVector("smallInt", allocator));
+
+ // add varchar vector
+ vector.addOrGet("varchar", FieldType.nullable(MinorType.VARCHAR.getType()), VarCharVector.class);
+
+ List<ValueVector> primitiveVectors = vector.getPrimitiveVectors();
+ assertEquals(4, primitiveVectors.size());
+ assertEquals(MinorType.INT, primitiveVectors.get(0).getMinorType());
+ assertEquals(MinorType.BIGINT, primitiveVectors.get(1).getMinorType());
+ assertEquals(MinorType.SMALLINT, primitiveVectors.get(2).getMinorType());
+ assertEquals(MinorType.VARCHAR, primitiveVectors.get(3).getMinorType());
+ }
+ }
+
+ @Test
+ public void testAddOrGetComplexChildVectors() {
+ FieldType type = new FieldType(true, Struct.INSTANCE, null, null);
+ try (StructVector vector = new StructVector("struct", allocator, type, null)) {
+
+ vector.addOrGetList("list");
+ vector.addOrGetFixedSizeList("fixedList", 2);
+ vector.addOrGetUnion("union");
+ vector.addOrGetStruct("struct");
+ vector.addOrGetMap("map", true);
+
+ List<FieldVector> childrens = vector.getChildrenFromFields();
+ assertEquals(5, childrens.size());
+ assertEquals(MinorType.LIST, childrens.get(0).getMinorType());
+ assertEquals(MinorType.FIXED_SIZE_LIST, childrens.get(1).getMinorType());
+ assertEquals(MinorType.UNION, childrens.get(2).getMinorType());
+ assertEquals(MinorType.STRUCT, childrens.get(3).getMinorType());
+ assertEquals(MinorType.MAP, childrens.get(4).getMinorType());
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java
new file mode 100644
index 000000000..97930f433
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.arrow.vector.types.DateUnit;
+import org.apache.arrow.vector.types.FloatingPointPrecision;
+import org.apache.arrow.vector.types.IntervalUnit;
+import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.UnionMode;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.junit.Test;
+
+public class TestTypeLayout {
+
+ @Test
+ public void testTypeBufferCount() {
+ ArrowType type = new ArrowType.Int(8, true);
+ assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size());
+
+ type = new ArrowType.Union(UnionMode.Sparse, new int[2]);
+ assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size());
+
+ type = new ArrowType.Union(UnionMode.Dense, new int[1]);
+ assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size());
+
+ type = new ArrowType.Struct();
+ assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size());
+
+ type = new ArrowType.Timestamp(TimeUnit.MILLISECOND, null);
+ assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size());
+
+ type = new ArrowType.List();
+ assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size());
+
+ type = new ArrowType.FixedSizeList(5);
+ assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size());
+
+ type = new ArrowType.Map(false);
+ assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size());
+
+ type = new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE);
+ assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size());
+
+ type = new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE);
+ assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size());
+
+ type = new ArrowType.Decimal(10, 10, 128);
+ assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size());
+
+ type = new ArrowType.Decimal(10, 10, 256);
+ assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size());
+
+
+ type = new ArrowType.FixedSizeBinary(5);
+ assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size());
+
+ type = new ArrowType.Bool();
+ assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size());
+
+ type = new ArrowType.Binary();
+ assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size());
+
+ type = new ArrowType.Utf8();
+ assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size());
+
+ type = new ArrowType.Null();
+ assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size());
+
+ type = new ArrowType.Date(DateUnit.DAY);
+ assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size());
+
+ type = new ArrowType.Time(TimeUnit.MILLISECOND, 32);
+ assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size());
+
+ type = new ArrowType.Interval(IntervalUnit.DAY_TIME);
+ assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size());
+
+ type = new ArrowType.Duration(TimeUnit.MILLISECOND);
+ assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size());
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java
new file mode 100644
index 000000000..f04998915
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java
@@ -0,0 +1,520 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.MapVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.complex.VectorWithOrdinal;
+import org.apache.arrow.vector.complex.impl.UnionWriter;
+import org.apache.arrow.vector.holders.NullableBitHolder;
+import org.apache.arrow.vector.holders.NullableFloat4Holder;
+import org.apache.arrow.vector.holders.NullableIntHolder;
+import org.apache.arrow.vector.holders.NullableUInt4Holder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.UnionMode;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestUnionVector {
+ private static final String EMPTY_SCHEMA_PATH = "";
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testUnionVector() throws Exception {
+
+ final NullableUInt4Holder uInt4Holder = new NullableUInt4Holder();
+ uInt4Holder.value = 100;
+ uInt4Holder.isSet = 1;
+
+ try (UnionVector unionVector =
+ new UnionVector(EMPTY_SCHEMA_PATH, allocator, /* field type */ null, /* call-back */ null)) {
+ unionVector.allocateNew();
+
+ // write some data
+ unionVector.setType(0, MinorType.UINT4);
+ unionVector.setSafe(0, uInt4Holder);
+ unionVector.setType(2, MinorType.UINT4);
+ unionVector.setSafe(2, uInt4Holder);
+ unionVector.setValueCount(4);
+
+ // check that what we wrote is correct
+ assertEquals(4, unionVector.getValueCount());
+
+ assertEquals(false, unionVector.isNull(0));
+ assertEquals(100, unionVector.getObject(0));
+
+ assertNull(unionVector.getObject(1));
+
+ assertEquals(false, unionVector.isNull(2));
+ assertEquals(100, unionVector.getObject(2));
+
+ assertNull(unionVector.getObject(3));
+ }
+ }
+
+ @Test
+ public void testUnionVectorMapValue() throws Exception {
+ try (UnionVector unionVector =
+ new UnionVector(EMPTY_SCHEMA_PATH, allocator, /* field type */ null, /* call-back */ null)) {
+ unionVector.allocateNew();
+
+ UnionWriter writer = (UnionWriter) unionVector.getWriter();
+
+ // populate map vector with the following two records
+ // [
+ // null,
+ // [[1: 2], [3: 4], [5: null]]
+ // ]
+
+ writer.setPosition(0);
+ writer.writeNull();
+
+ writer.setPosition(1);
+ writer.startMap();
+
+ writer.startEntry();
+ writer.key().integer().writeInt(1);
+ writer.value().integer().writeInt(2);
+ writer.endEntry();
+
+ writer.startEntry();
+ writer.key().integer().writeInt(3);
+ writer.value().integer().writeInt(4);
+ writer.endEntry();
+
+ writer.startEntry();
+ writer.key().integer().writeInt(5);
+ writer.endEntry();
+
+ writer.endMap();
+
+ unionVector.setValueCount(2);
+
+ // check that what we wrote is correct
+ assertEquals(2, unionVector.getValueCount());
+
+ // first entry
+ assertNull(unionVector.getObject(0));
+
+ // second entry
+ List<Map<String, Integer>> resultList = (List<Map<String, Integer>>) unionVector.getObject(1);
+ assertEquals(3, resultList.size());
+
+ Map<String, Integer> resultMap = resultList.get(0);
+ assertEquals(1, (int) resultMap.get(MapVector.KEY_NAME));
+ assertEquals(2, (int) resultMap.get(MapVector.VALUE_NAME));
+
+ resultMap = resultList.get(1);
+ assertEquals(3, (int) resultMap.get(MapVector.KEY_NAME));
+ assertEquals(4, (int) resultMap.get(MapVector.VALUE_NAME));
+
+ resultMap = resultList.get(2);
+ assertEquals(5, (int) resultMap.get(MapVector.KEY_NAME));
+ assertNull(resultMap.get(MapVector.VALUE_NAME));
+ }
+ }
+
+ @Test
+ public void testTransfer() throws Exception {
+ try (UnionVector srcVector =
+ new UnionVector(EMPTY_SCHEMA_PATH, allocator, /* field type */ null, /* call-back */ null)) {
+ srcVector.allocateNew();
+
+ // write some data
+ srcVector.setType(0, MinorType.INT);
+ srcVector.setSafe(0, newIntHolder(5));
+ srcVector.setType(1, MinorType.BIT);
+ srcVector.setSafe(1, newBitHolder(false));
+ srcVector.setType(3, MinorType.INT);
+ srcVector.setSafe(3, newIntHolder(10));
+ srcVector.setType(5, MinorType.BIT);
+ srcVector.setSafe(5, newBitHolder(false));
+ srcVector.setValueCount(6);
+
+ try (UnionVector destVector =
+ new UnionVector(EMPTY_SCHEMA_PATH, allocator, /* field type */ null, /* call-back */ null)) {
+ TransferPair pair = srcVector.makeTransferPair(destVector);
+
+ // Creating the transfer should transfer the type of the field at least.
+ assertEquals(srcVector.getField(), destVector.getField());
+
+ // transfer
+ pair.transfer();
+
+ assertEquals(srcVector.getField(), destVector.getField());
+
+ // now check the values are transferred
+ assertEquals(6, destVector.getValueCount());
+
+ assertFalse(destVector.isNull(0));
+ assertEquals(5, destVector.getObject(0));
+
+ assertFalse(destVector.isNull(1));
+ assertEquals(false, destVector.getObject(1));
+
+ assertNull(destVector.getObject(2));
+
+ assertFalse(destVector.isNull(3));
+ assertEquals(10, destVector.getObject(3));
+
+ assertNull(destVector.getObject(4));
+
+ assertFalse(destVector.isNull(5));
+ assertEquals(false, destVector.getObject(5));
+ }
+ }
+ }
+
+ @Test
+ public void testSplitAndTransfer() throws Exception {
+ try (UnionVector sourceVector =
+ new UnionVector(EMPTY_SCHEMA_PATH, allocator, /* field type */ null, /* call-back */ null)) {
+
+ sourceVector.allocateNew();
+
+ /* populate the UnionVector */
+ sourceVector.setType(0, MinorType.INT);
+ sourceVector.setSafe(0, newIntHolder(5));
+ sourceVector.setType(1, MinorType.INT);
+ sourceVector.setSafe(1, newIntHolder(10));
+ sourceVector.setType(2, MinorType.INT);
+ sourceVector.setSafe(2, newIntHolder(15));
+ sourceVector.setType(3, MinorType.INT);
+ sourceVector.setSafe(3, newIntHolder(20));
+ sourceVector.setType(4, MinorType.INT);
+ sourceVector.setSafe(4, newIntHolder(25));
+ sourceVector.setType(5, MinorType.INT);
+ sourceVector.setSafe(5, newIntHolder(30));
+ sourceVector.setType(6, MinorType.INT);
+ sourceVector.setSafe(6, newIntHolder(35));
+ sourceVector.setType(7, MinorType.INT);
+ sourceVector.setSafe(7, newIntHolder(40));
+ sourceVector.setType(8, MinorType.INT);
+ sourceVector.setSafe(8, newIntHolder(45));
+ sourceVector.setType(9, MinorType.INT);
+ sourceVector.setSafe(9, newIntHolder(50));
+ sourceVector.setValueCount(10);
+
+ /* check the vector output */
+ assertEquals(10, sourceVector.getValueCount());
+ assertEquals(false, sourceVector.isNull(0));
+ assertEquals(5, sourceVector.getObject(0));
+ assertEquals(false, sourceVector.isNull(1));
+ assertEquals(10, sourceVector.getObject(1));
+ assertEquals(false, sourceVector.isNull(2));
+ assertEquals(15, sourceVector.getObject(2));
+ assertEquals(false, sourceVector.isNull(3));
+ assertEquals(20, sourceVector.getObject(3));
+ assertEquals(false, sourceVector.isNull(4));
+ assertEquals(25, sourceVector.getObject(4));
+ assertEquals(false, sourceVector.isNull(5));
+ assertEquals(30, sourceVector.getObject(5));
+ assertEquals(false, sourceVector.isNull(6));
+ assertEquals(35, sourceVector.getObject(6));
+ assertEquals(false, sourceVector.isNull(7));
+ assertEquals(40, sourceVector.getObject(7));
+ assertEquals(false, sourceVector.isNull(8));
+ assertEquals(45, sourceVector.getObject(8));
+ assertEquals(false, sourceVector.isNull(9));
+ assertEquals(50, sourceVector.getObject(9));
+
+ try (UnionVector toVector =
+ new UnionVector(EMPTY_SCHEMA_PATH, allocator, /* field type */ null, /* call-back */ null)) {
+
+ final TransferPair transferPair = sourceVector.makeTransferPair(toVector);
+
+ final int[][] transferLengths = {{0, 3},
+ {3, 1},
+ {4, 2},
+ {6, 1},
+ {7, 1},
+ {8, 2}
+ };
+
+ for (final int[] transferLength : transferLengths) {
+ final int start = transferLength[0];
+ final int length = transferLength[1];
+
+ transferPair.splitAndTransfer(start, length);
+
+ /* check the toVector output after doing the splitAndTransfer */
+ for (int i = 0; i < length; i++) {
+ assertEquals("Different data at indexes: " + (start + i) + "and " + i, sourceVector.getObject(start + i),
+ toVector.getObject(i));
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testSplitAndTransferWithMixedVectors() throws Exception {
+ try (UnionVector sourceVector =
+ new UnionVector(EMPTY_SCHEMA_PATH, allocator, /* field type */ null, /* call-back */ null)) {
+
+ sourceVector.allocateNew();
+
+ /* populate the UnionVector */
+ sourceVector.setType(0, MinorType.INT);
+ sourceVector.setSafe(0, newIntHolder(5));
+
+ sourceVector.setType(1, MinorType.FLOAT4);
+ sourceVector.setSafe(1, newFloat4Holder(5.5f));
+
+ sourceVector.setType(2, MinorType.INT);
+ sourceVector.setSafe(2, newIntHolder(10));
+
+ sourceVector.setType(3, MinorType.FLOAT4);
+ sourceVector.setSafe(3, newFloat4Holder(10.5f));
+
+ sourceVector.setType(4, MinorType.INT);
+ sourceVector.setSafe(4, newIntHolder(15));
+
+ sourceVector.setType(5, MinorType.FLOAT4);
+ sourceVector.setSafe(5, newFloat4Holder(15.5f));
+
+ sourceVector.setType(6, MinorType.INT);
+ sourceVector.setSafe(6, newIntHolder(20));
+
+ sourceVector.setType(7, MinorType.FLOAT4);
+ sourceVector.setSafe(7, newFloat4Holder(20.5f));
+
+ sourceVector.setType(8, MinorType.INT);
+ sourceVector.setSafe(8, newIntHolder(30));
+
+ sourceVector.setType(9, MinorType.FLOAT4);
+ sourceVector.setSafe(9, newFloat4Holder(30.5f));
+ sourceVector.setValueCount(10);
+
+ /* check the vector output */
+ assertEquals(10, sourceVector.getValueCount());
+ assertEquals(false, sourceVector.isNull(0));
+ assertEquals(5, sourceVector.getObject(0));
+ assertEquals(false, sourceVector.isNull(1));
+ assertEquals(5.5f, sourceVector.getObject(1));
+ assertEquals(false, sourceVector.isNull(2));
+ assertEquals(10, sourceVector.getObject(2));
+ assertEquals(false, sourceVector.isNull(3));
+ assertEquals(10.5f, sourceVector.getObject(3));
+ assertEquals(false, sourceVector.isNull(4));
+ assertEquals(15, sourceVector.getObject(4));
+ assertEquals(false, sourceVector.isNull(5));
+ assertEquals(15.5f, sourceVector.getObject(5));
+ assertEquals(false, sourceVector.isNull(6));
+ assertEquals(20, sourceVector.getObject(6));
+ assertEquals(false, sourceVector.isNull(7));
+ assertEquals(20.5f, sourceVector.getObject(7));
+ assertEquals(false, sourceVector.isNull(8));
+ assertEquals(30, sourceVector.getObject(8));
+ assertEquals(false, sourceVector.isNull(9));
+ assertEquals(30.5f, sourceVector.getObject(9));
+
+ try (UnionVector toVector =
+ new UnionVector(EMPTY_SCHEMA_PATH, allocator, /* field type */ null, /* call-back */ null)) {
+
+ final TransferPair transferPair = sourceVector.makeTransferPair(toVector);
+
+ final int[][] transferLengths = {{0, 2},
+ {2, 1},
+ {3, 2},
+ {5, 3},
+ {8, 2}
+ };
+
+ for (final int[] transferLength : transferLengths) {
+ final int start = transferLength[0];
+ final int length = transferLength[1];
+
+ transferPair.splitAndTransfer(start, length);
+
+ /* check the toVector output after doing the splitAndTransfer */
+ for (int i = 0; i < length; i++) {
+ assertEquals("Different values at index: " + i, sourceVector.getObject(start + i), toVector.getObject(i));
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testGetFieldTypeInfo() throws Exception {
+ Map<String, String> metadata = new HashMap<>();
+ metadata.put("key1", "value1");
+
+ int[] typeIds = new int[2];
+ typeIds[0] = MinorType.INT.ordinal();
+ typeIds[1] = MinorType.VARCHAR.ordinal();
+
+ List<Field> children = new ArrayList<>();
+ children.add(new Field("int", FieldType.nullable(MinorType.INT.getType()), null));
+ children.add(new Field("varchar", FieldType.nullable(MinorType.VARCHAR.getType()), null));
+
+ final FieldType fieldType = new FieldType(false, new ArrowType.Union(UnionMode.Sparse, typeIds),
+ /*dictionary=*/null, metadata);
+ final Field field = new Field("union", fieldType, children);
+
+ MinorType minorType = MinorType.UNION;
+ UnionVector vector = (UnionVector) minorType.getNewVector(field, allocator, null);
+ vector.initializeChildrenFromFields(children);
+
+ assertTrue(vector.getField().equals(field));
+
+ // Union has 2 child vectors
+ assertEquals(vector.size(), 2);
+
+ // Check child field 0
+ VectorWithOrdinal intChild = vector.getChildVectorWithOrdinal("int");
+ assertEquals(intChild.ordinal, 0);
+ assertEquals(intChild.vector.getField(), children.get(0));
+
+ // Check child field 1
+ VectorWithOrdinal varcharChild = vector.getChildVectorWithOrdinal("varchar");
+ assertEquals(varcharChild.ordinal, 1);
+ assertEquals(varcharChild.vector.getField(), children.get(1));
+ }
+
+ @Test
+ public void testGetBufferAddress() throws Exception {
+ try (UnionVector vector =
+ new UnionVector(EMPTY_SCHEMA_PATH, allocator, /* field type */ null, /* call-back */ null)) {
+ boolean error = false;
+
+ vector.allocateNew();
+
+ /* populate the UnionVector */
+ vector.setType(0, MinorType.INT);
+ vector.setSafe(0, newIntHolder(5));
+
+ vector.setType(1, MinorType.FLOAT4);
+ vector.setSafe(1, newFloat4Holder(5.5f));
+
+ vector.setType(2, MinorType.INT);
+ vector.setSafe(2, newIntHolder(10));
+
+ vector.setType(3, MinorType.FLOAT4);
+ vector.setSafe(3, newFloat4Holder(10.5f));
+
+ vector.setValueCount(10);
+
+ /* check the vector output */
+ assertEquals(10, vector.getValueCount());
+ assertEquals(false, vector.isNull(0));
+ assertEquals(5, vector.getObject(0));
+ assertEquals(false, vector.isNull(1));
+ assertEquals(5.5f, vector.getObject(1));
+ assertEquals(false, vector.isNull(2));
+ assertEquals(10, vector.getObject(2));
+ assertEquals(false, vector.isNull(3));
+ assertEquals(10.5f, vector.getObject(3));
+
+ List<ArrowBuf> buffers = vector.getFieldBuffers();
+
+
+ try {
+ long offsetAddress = vector.getOffsetBufferAddress();
+ } catch (UnsupportedOperationException ue) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ error = false;
+ }
+
+ try {
+ long dataAddress = vector.getDataBufferAddress();
+ } catch (UnsupportedOperationException ue) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ }
+
+ assertEquals(1, buffers.size());
+ }
+ }
+
+ @Test
+ public void testSetGetNull() {
+ try (UnionVector srcVector =
+ new UnionVector(EMPTY_SCHEMA_PATH, allocator, /* field type */ null, /* call-back */ null)) {
+ srcVector.allocateNew();
+
+ final NullableIntHolder holder = new NullableIntHolder();
+ holder.isSet = 1;
+ holder.value = 5;
+
+ // write some data
+ srcVector.setType(0, MinorType.INT);
+ srcVector.setSafe(0, holder);
+
+ assertFalse(srcVector.isNull(0));
+
+ holder.isSet = 0;
+ srcVector.setSafe(0, holder);
+
+ assertNull(srcVector.getObject(0));
+ }
+ }
+
+ private static NullableIntHolder newIntHolder(int value) {
+ final NullableIntHolder holder = new NullableIntHolder();
+ holder.isSet = 1;
+ holder.value = value;
+ return holder;
+ }
+
+ private static NullableBitHolder newBitHolder(boolean value) {
+ final NullableBitHolder holder = new NullableBitHolder();
+ holder.isSet = 1;
+ holder.value = value ? 1 : 0;
+ return holder;
+ }
+
+ private static NullableFloat4Holder newFloat4Holder(float value) {
+ final NullableFloat4Holder holder = new NullableFloat4Holder();
+ holder.isSet = 1;
+ holder.value = value;
+ return holder;
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestUtils.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestUtils.java
new file mode 100644
index 000000000..7e64dd386
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestUtils.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.FieldType;
+
+public class TestUtils {
+
+ public static VarCharVector newVarCharVector(String name, BufferAllocator allocator) {
+ return (VarCharVector)
+ FieldType.nullable(new ArrowType.Utf8()).createNewSingleVector(name, allocator, null);
+ }
+
+ public static VarBinaryVector newVarBinaryVector(String name, BufferAllocator allocator) {
+ return (VarBinaryVector)
+ FieldType.nullable(new ArrowType.Binary()).createNewSingleVector(name, allocator, null);
+ }
+
+ public static <T> T newVector(Class<T> c, String name, ArrowType type, BufferAllocator allocator) {
+ return c.cast(FieldType.nullable(type).createNewSingleVector(name, allocator, null));
+ }
+
+ public static <T> T newVector(Class<T> c, String name, MinorType type, BufferAllocator allocator) {
+ return c.cast(FieldType.nullable(type.getType()).createNewSingleVector(name, allocator, null));
+ }
+
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
new file mode 100644
index 000000000..572c3d594
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
@@ -0,0 +1,3061 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.TestUtils.newVarBinaryVector;
+import static org.apache.arrow.vector.TestUtils.newVarCharVector;
+import static org.apache.arrow.vector.TestUtils.newVector;
+import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.memory.rounding.DefaultRoundingPolicy;
+import org.apache.arrow.memory.util.ArrowBufPointer;
+import org.apache.arrow.memory.util.CommonUtil;
+import org.apache.arrow.vector.compare.Range;
+import org.apache.arrow.vector.compare.RangeEqualsVisitor;
+import org.apache.arrow.vector.compare.VectorEqualsVisitor;
+import org.apache.arrow.vector.complex.DenseUnionVector;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.complex.impl.NullableStructWriter;
+import org.apache.arrow.vector.complex.impl.UnionListWriter;
+import org.apache.arrow.vector.holders.NullableIntHolder;
+import org.apache.arrow.vector.holders.NullableUInt4Holder;
+import org.apache.arrow.vector.holders.NullableVarBinaryHolder;
+import org.apache.arrow.vector.holders.NullableVarCharHolder;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.util.OversizedAllocationException;
+import org.apache.arrow.vector.util.Text;
+import org.apache.arrow.vector.util.TransferPair;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestValueVector {
+
+ private static final String EMPTY_SCHEMA_PATH = "";
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ }
+
+ private static final Charset utf8Charset = Charset.forName("UTF-8");
+ private static final byte[] STR1 = "AAAAA1".getBytes(utf8Charset);
+ private static final byte[] STR2 = "BBBBBBBBB2".getBytes(utf8Charset);
+ private static final byte[] STR3 = "CCCC3".getBytes(utf8Charset);
+ private static final byte[] STR4 = "DDDDDDDD4".getBytes(utf8Charset);
+ private static final byte[] STR5 = "EEE5".getBytes(utf8Charset);
+ private static final byte[] STR6 = "FFFFF6".getBytes(utf8Charset);
+ private static final int MAX_VALUE_COUNT =
+ (int) (Integer.getInteger("arrow.vector.max_allocation_bytes", Integer.MAX_VALUE) / 7);
+ private static final int MAX_VALUE_COUNT_8BYTE = (int) (MAX_VALUE_COUNT / 2);
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ /*
+ * Tests for Fixed-Width vectors
+ *
+ * Covered types as of now
+ *
+ * -- UInt4Vector
+ * -- IntVector
+ * -- Float4Vector
+ * -- Float8Vector
+ *
+ * -- UInt4Vector
+ * -- IntVector
+ * -- Float4Vector
+ *
+ * TODO:
+ *
+ * -- SmallIntVector
+ * -- BigIntVector
+ * -- TinyIntVector
+ */
+
+ @Test /* UInt4Vector */
+ public void testFixedType1() {
+
+ // Create a new value vector for 1024 integers.
+ try (final UInt4Vector vector = new UInt4Vector(EMPTY_SCHEMA_PATH, allocator)) {
+
+ boolean error = false;
+ int initialCapacity = 0;
+
+ vector.allocateNew(1024);
+ initialCapacity = vector.getValueCapacity();
+ assertTrue(initialCapacity >= 1024);
+
+ // Put and set a few values
+ vector.setSafe(0, 100);
+ vector.setSafe(1, 101);
+ vector.setSafe(100, 102);
+ vector.setSafe(1022, 103);
+ vector.setSafe(1023, 104);
+
+ assertEquals(100, vector.get(0));
+ assertEquals(101, vector.get(1));
+ assertEquals(102, vector.get(100));
+ assertEquals(103, vector.get(1022));
+ assertEquals(104, vector.get(1023));
+
+ try {
+ vector.set(initialCapacity, 10000);
+ } catch (IndexOutOfBoundsException ie) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ error = false;
+ }
+
+ try {
+ vector.get(initialCapacity);
+ } catch (IndexOutOfBoundsException ie) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ error = false;
+ }
+
+ /* this should trigger a realloc() */
+ vector.setSafe(initialCapacity, 10000);
+
+ /* underlying buffer should now be able to store double the number of values */
+ assertTrue(vector.getValueCapacity() >= 2 * initialCapacity);
+
+ /* check vector data after realloc */
+ assertEquals(100, vector.get(0));
+ assertEquals(101, vector.get(1));
+ assertEquals(102, vector.get(100));
+ assertEquals(103, vector.get(1022));
+ assertEquals(104, vector.get(1023));
+ assertEquals(10000, vector.get(initialCapacity));
+
+ /* reset the vector */
+ int capacityBeforeReset = vector.getValueCapacity();
+ vector.reset();
+
+ /* capacity shouldn't change after reset */
+ assertEquals(capacityBeforeReset, vector.getValueCapacity());
+
+ /* vector data should have been zeroed out */
+ for (int i = 0; i < capacityBeforeReset; i++) {
+ // TODO: test vector.get(i) is 0 after unsafe get added
+ assertEquals("non-zero data not expected at index: " + i, true, vector.isNull(i));
+ }
+ }
+ }
+
+ @Test /* IntVector */
+ public void testFixedType2() {
+ try (final IntVector intVector = new IntVector(EMPTY_SCHEMA_PATH, allocator)) {
+ boolean error = false;
+ int initialCapacity = 16;
+
+ /* we should not throw exception for these values of capacity */
+ intVector.setInitialCapacity(MAX_VALUE_COUNT - 1);
+ intVector.setInitialCapacity(MAX_VALUE_COUNT);
+
+ try {
+ intVector.setInitialCapacity(MAX_VALUE_COUNT * 2);
+ } catch (OversizedAllocationException oe) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ error = false;
+ }
+
+ intVector.setInitialCapacity(initialCapacity);
+ /* no memory allocation has happened yet so capacity of underlying buffer should be 0 */
+ assertEquals(0, intVector.getValueCapacity());
+
+ /* allocate 64 bytes (16 * 4) */
+ intVector.allocateNew();
+ /* underlying buffer should be able to store 16 values */
+ assertTrue(intVector.getValueCapacity() >= initialCapacity);
+ initialCapacity = intVector.getValueCapacity();
+
+ /* populate the vector */
+ int j = 1;
+ for (int i = 0; i < initialCapacity; i += 2) {
+ intVector.set(i, j);
+ j++;
+ }
+
+ try {
+ intVector.set(initialCapacity, j);
+ } catch (IndexOutOfBoundsException ie) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ error = false;
+ }
+
+ /* check vector contents */
+ j = 1;
+ for (int i = 0; i < initialCapacity; i += 2) {
+ assertEquals("unexpected value at index: " + i, j, intVector.get(i));
+ j++;
+ }
+
+ try {
+ intVector.get(initialCapacity);
+ } catch (IndexOutOfBoundsException ie) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ error = false;
+ }
+
+ /* this should trigger a realloc() */
+ intVector.setSafe(initialCapacity, j);
+
+ /* underlying buffer should now be able to store double the number of values */
+ assertTrue(intVector.getValueCapacity() >= initialCapacity * 2);
+
+ /* vector data should still be intact after realloc */
+ j = 1;
+ for (int i = 0; i <= initialCapacity; i += 2) {
+ assertEquals("unexpected value at index: " + i, j, intVector.get(i));
+ j++;
+ }
+
+ /* reset the vector */
+ int capacityBeforeRealloc = intVector.getValueCapacity();
+ intVector.reset();
+
+ /* capacity shouldn't change after reset */
+ assertEquals(capacityBeforeRealloc, intVector.getValueCapacity());
+
+ /* vector data should have been zeroed out */
+ for (int i = 0; i < capacityBeforeRealloc; i++) {
+ assertEquals("non-zero data not expected at index: " + i, true, intVector.isNull(i));
+ }
+ }
+ }
+
+ @Test /* VarCharVector */
+ public void testSizeOfValueBuffer() {
+ try (final VarCharVector vector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator)) {
+ int valueCount = 100;
+ int currentSize = 0;
+ vector.setInitialCapacity(valueCount);
+ vector.allocateNew();
+ vector.setValueCount(valueCount);
+ for (int i = 0; i < valueCount; i++) {
+ currentSize += i;
+ vector.setSafe(i, new byte[i]);
+ }
+
+ assertEquals(currentSize, vector.sizeOfValueBuffer());
+ }
+ }
+
+ @Test /* Float4Vector */
+ public void testFixedType3() {
+ try (final Float4Vector floatVector = new Float4Vector(EMPTY_SCHEMA_PATH, allocator)) {
+ boolean error = false;
+ int initialCapacity = 16;
+
+ /* we should not throw exception for these values of capacity */
+ floatVector.setInitialCapacity(MAX_VALUE_COUNT - 1);
+ floatVector.setInitialCapacity(MAX_VALUE_COUNT);
+
+ try {
+ floatVector.setInitialCapacity(MAX_VALUE_COUNT * 2);
+ } catch (OversizedAllocationException oe) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ error = false;
+ }
+
+ floatVector.setInitialCapacity(initialCapacity);
+ /* no memory allocation has happened yet so capacity of underlying buffer should be 0 */
+ assertEquals(0, floatVector.getValueCapacity());
+
+ /* allocate 64 bytes (16 * 4) */
+ floatVector.allocateNew();
+ /* underlying buffer should be able to store 16 values */
+ assertTrue(floatVector.getValueCapacity() >= initialCapacity);
+ initialCapacity = floatVector.getValueCapacity();
+
+ floatVector.zeroVector();
+
+ /* populate the floatVector */
+ floatVector.set(0, 1.5f);
+ floatVector.set(2, 2.5f);
+ floatVector.set(4, 3.3f);
+ floatVector.set(6, 4.8f);
+ floatVector.set(8, 5.6f);
+ floatVector.set(10, 6.6f);
+ floatVector.set(12, 7.8f);
+ floatVector.set(14, 8.5f);
+
+ try {
+ floatVector.set(initialCapacity, 9.5f);
+ } catch (IndexOutOfBoundsException ie) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ error = false;
+ }
+
+ /* check vector contents */
+ assertEquals(1.5f, floatVector.get(0), 0);
+ assertEquals(2.5f, floatVector.get(2), 0);
+ assertEquals(3.3f, floatVector.get(4), 0);
+ assertEquals(4.8f, floatVector.get(6), 0);
+ assertEquals(5.6f, floatVector.get(8), 0);
+ assertEquals(6.6f, floatVector.get(10), 0);
+ assertEquals(7.8f, floatVector.get(12), 0);
+ assertEquals(8.5f, floatVector.get(14), 0);
+
+ try {
+ floatVector.get(initialCapacity);
+ } catch (IndexOutOfBoundsException ie) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ error = false;
+ }
+
+ /* this should trigger a realloc() */
+ floatVector.setSafe(initialCapacity, 9.5f);
+
+ /* underlying buffer should now be able to store double the number of values */
+ assertTrue(floatVector.getValueCapacity() >= initialCapacity * 2);
+
+ /* vector data should still be intact after realloc */
+ assertEquals(1.5f, floatVector.get(0), 0);
+ assertEquals(2.5f, floatVector.get(2), 0);
+ assertEquals(3.3f, floatVector.get(4), 0);
+ assertEquals(4.8f, floatVector.get(6), 0);
+ assertEquals(5.6f, floatVector.get(8), 0);
+ assertEquals(6.6f, floatVector.get(10), 0);
+ assertEquals(7.8f, floatVector.get(12), 0);
+ assertEquals(8.5f, floatVector.get(14), 0);
+ assertEquals(9.5f, floatVector.get(initialCapacity), 0);
+
+ /* reset the vector */
+ int capacityBeforeReset = floatVector.getValueCapacity();
+ floatVector.reset();
+
+ /* capacity shouldn't change after reset */
+ assertEquals(capacityBeforeReset, floatVector.getValueCapacity());
+
+ /* vector data should be zeroed out */
+ for (int i = 0; i < capacityBeforeReset; i++) {
+ assertEquals("non-zero data not expected at index: " + i, true, floatVector.isNull(i));
+ }
+ }
+ }
+
+ @Test /* Float8Vector */
+ public void testFixedType4() {
+ try (final Float8Vector floatVector = new Float8Vector(EMPTY_SCHEMA_PATH, allocator)) {
+ boolean error = false;
+ int initialCapacity = 16;
+
+ /* we should not throw exception for these values of capacity */
+ floatVector.setInitialCapacity(MAX_VALUE_COUNT_8BYTE - 1);
+ floatVector.setInitialCapacity(MAX_VALUE_COUNT_8BYTE);
+
+ try {
+ floatVector.setInitialCapacity(MAX_VALUE_COUNT_8BYTE * 2);
+ } catch (OversizedAllocationException oe) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ error = false;
+ }
+
+ floatVector.setInitialCapacity(initialCapacity);
+ /* no memory allocation has happened yet so capacity of underlying buffer should be 0 */
+ assertEquals(0, floatVector.getValueCapacity());
+
+ /* allocate 128 bytes (16 * 8) */
+ floatVector.allocateNew();
+ /* underlying buffer should be able to store 16 values */
+ assertTrue(floatVector.getValueCapacity() >= initialCapacity);
+ initialCapacity = floatVector.getValueCapacity();
+
+ /* populate the vector */
+ floatVector.set(0, 1.55);
+ floatVector.set(2, 2.53);
+ floatVector.set(4, 3.36);
+ floatVector.set(6, 4.82);
+ floatVector.set(8, 5.67);
+ floatVector.set(10, 6.67);
+ floatVector.set(12, 7.87);
+ floatVector.set(14, 8.56);
+
+ try {
+ floatVector.set(initialCapacity, 9.53);
+ } catch (IndexOutOfBoundsException ie) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ error = false;
+ }
+
+ /* check floatVector contents */
+ assertEquals(1.55, floatVector.get(0), 0);
+ assertEquals(2.53, floatVector.get(2), 0);
+ assertEquals(3.36, floatVector.get(4), 0);
+ assertEquals(4.82, floatVector.get(6), 0);
+ assertEquals(5.67, floatVector.get(8), 0);
+ assertEquals(6.67, floatVector.get(10), 0);
+ assertEquals(7.87, floatVector.get(12), 0);
+ assertEquals(8.56, floatVector.get(14), 0);
+
+ try {
+ floatVector.get(initialCapacity);
+ } catch (IndexOutOfBoundsException ie) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ error = false;
+ }
+
+ /* this should trigger a realloc() */
+ floatVector.setSafe(initialCapacity, 9.53);
+
+ /* underlying buffer should now be able to store double the number of values */
+ assertTrue(floatVector.getValueCapacity() >= initialCapacity * 2);
+
+ /* vector data should still be intact after realloc */
+ assertEquals(1.55, floatVector.get(0), 0);
+ assertEquals(2.53, floatVector.get(2), 0);
+ assertEquals(3.36, floatVector.get(4), 0);
+ assertEquals(4.82, floatVector.get(6), 0);
+ assertEquals(5.67, floatVector.get(8), 0);
+ assertEquals(6.67, floatVector.get(10), 0);
+ assertEquals(7.87, floatVector.get(12), 0);
+ assertEquals(8.56, floatVector.get(14), 0);
+ assertEquals(9.53, floatVector.get(initialCapacity), 0);
+
+ /* reset the vector */
+ int capacityBeforeReset = floatVector.getValueCapacity();
+ floatVector.reset();
+
+ /* capacity shouldn't change after reset */
+ assertEquals(capacityBeforeReset, floatVector.getValueCapacity());
+
+ /* vector data should be zeroed out */
+ for (int i = 0; i < capacityBeforeReset; i++) {
+ assertEquals("non-zero data not expected at index: " + i, true, floatVector.isNull(i));
+ }
+ }
+ }
+
+ @Test /* UInt4Vector */
+ public void testNullableFixedType1() {
+
+ // Create a new value vector for 1024 integers.
+ try (final UInt4Vector vector = newVector(UInt4Vector.class, EMPTY_SCHEMA_PATH, new ArrowType.Int(32, false),
+ allocator);) {
+ boolean error = false;
+ int initialCapacity = 1024;
+
+ vector.setInitialCapacity(initialCapacity);
+ /* no memory allocation has happened yet */
+ assertEquals(0, vector.getValueCapacity());
+
+ vector.allocateNew();
+ assertTrue(vector.getValueCapacity() >= initialCapacity);
+ initialCapacity = vector.getValueCapacity();
+
+ // Put and set a few values
+ vector.set(0, 100);
+ vector.set(1, 101);
+ vector.set(100, 102);
+ vector.set(initialCapacity - 2, 103);
+ vector.set(initialCapacity - 1, 104);
+
+ /* check vector contents */
+ assertEquals(100, vector.get(0));
+ assertEquals(101, vector.get(1));
+ assertEquals(102, vector.get(100));
+ assertEquals(103, vector.get(initialCapacity - 2));
+ assertEquals(104, vector.get(initialCapacity - 1));
+
+ int val = 0;
+
+ /* check unset bits/null values */
+ for (int i = 2, j = 101; i <= 99 || j <= initialCapacity - 3; i++, j++) {
+ if (i <= 99) {
+ assertTrue(vector.isNull(i));
+ }
+ if (j <= initialCapacity - 3) {
+ assertTrue(vector.isNull(j));
+ }
+ }
+
+ try {
+ vector.set(initialCapacity, 10000);
+ } catch (IndexOutOfBoundsException ie) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ error = false;
+ }
+
+ try {
+ vector.get(initialCapacity);
+ } catch (IndexOutOfBoundsException ie) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ error = false;
+ }
+
+ /* should trigger a realloc of the underlying bitvector and valuevector */
+ vector.setSafe(initialCapacity, 10000);
+
+ /* check new capacity */
+ assertTrue(vector.getValueCapacity() >= initialCapacity * 2);
+
+ /* vector contents should still be intact after realloc */
+ assertEquals(100, vector.get(0));
+ assertEquals(101, vector.get(1));
+ assertEquals(102, vector.get(100));
+ assertEquals(103, vector.get(initialCapacity - 2));
+ assertEquals(104, vector.get(initialCapacity - 1));
+ assertEquals(10000, vector.get(initialCapacity));
+
+ val = 0;
+
+ /* check unset bits/null values */
+ for (int i = 2, j = 101; i < 99 || j < initialCapacity - 3; i++, j++) {
+ if (i <= 99) {
+ assertTrue(vector.isNull(i));
+ }
+ if (j <= initialCapacity - 3) {
+ assertTrue(vector.isNull(j));
+ }
+ }
+
+ /* reset the vector */
+ int capacityBeforeReset = vector.getValueCapacity();
+ vector.reset();
+
+ /* capacity shouldn't change after reset */
+ assertEquals(capacityBeforeReset, vector.getValueCapacity());
+
+ /* vector data should be zeroed out */
+ for (int i = 0; i < capacityBeforeReset; i++) {
+ assertTrue("non-null data not expected at index: " + i, vector.isNull(i));
+ }
+ }
+ }
+
+ @Test /* Float4Vector */
+ public void testNullableFixedType2() {
+ // Create a new value vector for 1024 integers
+ try (final Float4Vector vector = newVector(Float4Vector.class, EMPTY_SCHEMA_PATH, MinorType.FLOAT4, allocator);) {
+ boolean error = false;
+ int initialCapacity = 16;
+
+ vector.setInitialCapacity(initialCapacity);
+ /* no memory allocation has happened yet */
+ assertEquals(0, vector.getValueCapacity());
+
+ vector.allocateNew();
+ assertTrue(vector.getValueCapacity() >= initialCapacity);
+ initialCapacity = vector.getValueCapacity();
+
+ /* populate the vector */
+ vector.set(0, 100.5f);
+ vector.set(2, 201.5f);
+ vector.set(4, 300.3f);
+ vector.set(6, 423.8f);
+ vector.set(8, 555.6f);
+ vector.set(10, 66.6f);
+ vector.set(12, 78.8f);
+ vector.set(14, 89.5f);
+
+ try {
+ vector.set(initialCapacity, 90.5f);
+ } catch (IndexOutOfBoundsException ie) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ error = false;
+ }
+
+ /* check vector contents */
+ assertEquals(100.5f, vector.get(0), 0);
+ assertTrue(vector.isNull(1));
+ assertEquals(201.5f, vector.get(2), 0);
+ assertTrue(vector.isNull(3));
+ assertEquals(300.3f, vector.get(4), 0);
+ assertTrue(vector.isNull(5));
+ assertEquals(423.8f, vector.get(6), 0);
+ assertTrue(vector.isNull(7));
+ assertEquals(555.6f, vector.get(8), 0);
+ assertTrue(vector.isNull(9));
+ assertEquals(66.6f, vector.get(10), 0);
+ assertTrue(vector.isNull(11));
+ assertEquals(78.8f, vector.get(12), 0);
+ assertTrue(vector.isNull(13));
+ assertEquals(89.5f, vector.get(14), 0);
+ assertTrue(vector.isNull(15));
+
+ try {
+ vector.get(initialCapacity);
+ } catch (IndexOutOfBoundsException ie) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ error = false;
+ }
+
+ /* this should trigger a realloc() */
+ vector.setSafe(initialCapacity, 90.5f);
+
+ /* underlying buffer should now be able to store double the number of values */
+ assertTrue(vector.getValueCapacity() >= 2 * initialCapacity);
+
+ /* vector data should still be intact after realloc */
+ assertEquals(100.5f, vector.get(0), 0);
+ assertTrue(vector.isNull(1));
+ assertEquals(201.5f, vector.get(2), 0);
+ assertTrue(vector.isNull(3));
+ assertEquals(300.3f, vector.get(4), 0);
+ assertTrue(vector.isNull(5));
+ assertEquals(423.8f, vector.get(6), 0);
+ assertTrue(vector.isNull(7));
+ assertEquals(555.6f, vector.get(8), 0);
+ assertTrue(vector.isNull(9));
+ assertEquals(66.6f, vector.get(10), 0);
+ assertTrue(vector.isNull(11));
+ assertEquals(78.8f, vector.get(12), 0);
+ assertTrue(vector.isNull(13));
+ assertEquals(89.5f, vector.get(14), 0);
+ assertTrue(vector.isNull(15));
+
+ /* reset the vector */
+ int capacityBeforeReset = vector.getValueCapacity();
+ vector.reset();
+
+ /* capacity shouldn't change after reset */
+ assertEquals(capacityBeforeReset, vector.getValueCapacity());
+
+ /* vector data should be zeroed out */
+ for (int i = 0; i < capacityBeforeReset; i++) {
+ assertTrue("non-null data not expected at index: " + i, vector.isNull(i));
+ }
+ }
+ }
+
+ @Test /* IntVector */
+ public void testNullableFixedType3() {
+ // Create a new value vector for 1024 integers
+ try (final IntVector vector = newVector(IntVector.class, EMPTY_SCHEMA_PATH, MinorType.INT, allocator)) {
+ boolean error = false;
+ int initialCapacity = 1024;
+
+ /* no memory allocation has happened yet so capacity of underlying buffer should be 0 */
+ assertEquals(0, vector.getValueCapacity());
+ /* allocate space for 4KB data (1024 * 4) */
+ vector.allocateNew(initialCapacity);
+ /* underlying buffer should be able to store 1024 values */
+ assertTrue(vector.getValueCapacity() >= initialCapacity);
+ initialCapacity = vector.getValueCapacity();
+
+ vector.set(0, 1);
+ vector.set(1, 2);
+ vector.set(100, 3);
+ vector.set(1022, 4);
+ vector.set(1023, 5);
+
+ /* check vector contents */
+ int j = 1;
+ for (int i = 0; i <= 1023; i++) {
+ if ((i >= 2 && i <= 99) || (i >= 101 && i <= 1021)) {
+ assertTrue("non-null data not expected at index: " + i, vector.isNull(i));
+ } else {
+ assertFalse("null data not expected at index: " + i, vector.isNull(i));
+ assertEquals("unexpected value at index: " + i, j, vector.get(i));
+ j++;
+ }
+ }
+
+ vector.setValueCount(1024);
+ Field field = vector.getField();
+
+ List<ArrowBuf> buffers = vector.getFieldBuffers();
+
+ assertEquals(2, buffers.size());
+
+ ArrowBuf validityVectorBuf = buffers.get(0);
+
+ /* bitvector tracks 1024 integers --> 1024 bits --> 128 bytes */
+ assertTrue(validityVectorBuf.readableBytes() >= 128);
+ assertEquals(3, validityVectorBuf.getByte(0)); // 1st and second bit defined
+ for (int i = 1; i < 12; i++) {
+ assertEquals(0, validityVectorBuf.getByte(i)); // nothing defined until 100
+ }
+ assertEquals(16, validityVectorBuf.getByte(12)); // 100th bit is defined (12 * 8 + 4)
+ for (int i = 13; i < 127; i++) {
+ assertEquals(0, validityVectorBuf.getByte(i)); // nothing defined between 100th and 1022nd
+ }
+ assertEquals(-64, validityVectorBuf.getByte(127)); // 1022nd and 1023rd bit defined
+
+ /* this should trigger a realloc() */
+ vector.setSafe(initialCapacity, 6);
+
+ /* underlying buffer should now be able to store double the number of values */
+ assertTrue(vector.getValueCapacity() >= 2 * initialCapacity);
+
+ /* vector data should still be intact after realloc */
+ j = 1;
+ for (int i = 0; i < (initialCapacity * 2); i++) {
+ if ((i > 1023 && i != initialCapacity) || (i >= 2 && i <= 99) || (i >= 101 && i <= 1021)) {
+ assertTrue("non-null data not expected at index: " + i, vector.isNull(i));
+ } else {
+ assertFalse("null data not expected at index: " + i, vector.isNull(i));
+ assertEquals("unexpected value at index: " + i, j, vector.get(i));
+ j++;
+ }
+ }
+
+ /* reset the vector */
+ int capacityBeforeReset = vector.getValueCapacity();
+ vector.reset();
+
+ /* capacity shouldn't change after reset */
+ assertEquals(capacityBeforeReset, vector.getValueCapacity());
+
+ /* vector data should have been zeroed out */
+ for (int i = 0; i < capacityBeforeReset; i++) {
+ assertTrue("non-null data not expected at index: " + i, vector.isNull(i));
+ }
+
+ vector.allocateNew(initialCapacity * 4);
+ // vector has been erased
+ for (int i = 0; i < initialCapacity * 4; i++) {
+ assertTrue("non-null data not expected at index: " + i, vector.isNull(i));
+ }
+ }
+ }
+
+ @Test /* IntVector */
+ public void testNullableFixedType4() {
+ try (final IntVector vector = newVector(IntVector.class, EMPTY_SCHEMA_PATH, MinorType.INT, allocator)) {
+
+ /* no memory allocation has happened yet */
+ assertEquals(0, vector.getValueCapacity());
+
+ vector.allocateNew();
+ int valueCapacity = vector.getValueCapacity();
+ assertEquals(vector.INITIAL_VALUE_ALLOCATION, valueCapacity);
+
+ int baseValue = 20000;
+
+ for (int i = 0; i < valueCapacity; i++) {
+ if ((i & 1) == 1) {
+ vector.set(i, baseValue + i);
+ }
+ }
+
+ for (int i = 0; i < valueCapacity; i++) {
+ if ((i & 1) == 1) {
+ assertFalse("unexpected null value at index: " + i, vector.isNull(i));
+ assertEquals("unexpected value at index: " + i, (baseValue + i), vector.get(i));
+ } else {
+ assertTrue("unexpected non-null value at index: " + i, vector.isNull(i));
+ }
+ }
+
+ vector.setSafe(valueCapacity, 20000000);
+ assertTrue(vector.getValueCapacity() >= valueCapacity * 2);
+
+ for (int i = 0; i < vector.getValueCapacity(); i++) {
+ if (i == valueCapacity) {
+ assertFalse("unexpected null value at index: " + i, vector.isNull(i));
+ assertEquals("unexpected value at index: " + i, 20000000, vector.get(i));
+ } else if (i < valueCapacity) {
+ if ((i & 1) == 1) {
+ assertFalse("unexpected null value at index: " + i, vector.isNull(i));
+ assertEquals("unexpected value at index: " + i, (baseValue + i), vector.get(i));
+ }
+ } else {
+ assertTrue("unexpected non-null value at index: " + i, vector.isNull(i));
+ }
+ }
+
+ vector.zeroVector();
+
+ for (int i = 0; i < vector.getValueCapacity(); i += 2) {
+ vector.set(i, baseValue + i);
+ }
+
+ for (int i = 0; i < vector.getValueCapacity(); i++) {
+ if (i % 2 == 0) {
+ assertFalse("unexpected null value at index: " + i, vector.isNull(i));
+ assertEquals("unexpected value at index: " + i, (baseValue + i), vector.get(i));
+ } else {
+ assertTrue("unexpected non-null value at index: " + i, vector.isNull(i));
+ }
+ }
+
+ int valueCapacityBeforeRealloc = vector.getValueCapacity();
+ vector.setSafe(valueCapacityBeforeRealloc + 1000, 400000000);
+ assertTrue(vector.getValueCapacity() >= valueCapacity * 4);
+
+ for (int i = 0; i < vector.getValueCapacity(); i++) {
+ if (i == (valueCapacityBeforeRealloc + 1000)) {
+ assertFalse("unexpected null value at index: " + i, vector.isNull(i));
+ assertEquals("unexpected value at index: " + i, 400000000, vector.get(i));
+ } else if (i < valueCapacityBeforeRealloc && (i % 2) == 0) {
+ assertFalse("unexpected null value at index: " + i, vector.isNull(i));
+ assertEquals("unexpected value at index: " + i, baseValue + i, vector.get(i));
+ } else {
+ assertTrue("unexpected non-null value at index: " + i, vector.isNull(i));
+ }
+ }
+
+ /* reset the vector */
+ int valueCapacityBeforeReset = vector.getValueCapacity();
+ vector.reset();
+
+ /* capacity shouldn't change after reset */
+ assertEquals(valueCapacityBeforeReset, vector.getValueCapacity());
+
+ /* vector data should be zeroed out */
+ for (int i = 0; i < valueCapacityBeforeReset; i++) {
+ assertTrue("non-null data not expected at index: " + i, vector.isNull(i));
+ }
+ }
+ }
+
+ /*
+ * Tests for Variable Width Vectors
+ *
+ * Covered types as of now
+ *
+ * -- VarCharVector
+ * -- VarBinaryVector
+ *
+ * TODO:
+ *
+ * -- VarCharVector
+ * -- VarBinaryVector
+ */
+
+ /**
+ * ARROW-7831: this checks that a slice taken off a buffer is still readable after that buffer's allocator is closed.
+ */
+ @Test /* VarCharVector */
+ public void testSplitAndTransfer1() {
+ try (final VarCharVector targetVector = newVarCharVector("split-target", allocator)) {
+ try (final VarCharVector sourceVector = newVarCharVector(EMPTY_SCHEMA_PATH, allocator)) {
+ sourceVector.allocateNew(1024 * 10, 1024);
+
+ sourceVector.set(0, STR1);
+ sourceVector.set(1, STR2);
+ sourceVector.set(2, STR3);
+ sourceVector.setValueCount(3);
+
+ final long allocatedMem = allocator.getAllocatedMemory();
+ final int validityRefCnt = sourceVector.getValidityBuffer().refCnt();
+ final int offsetRefCnt = sourceVector.getOffsetBuffer().refCnt();
+ final int dataRefCnt = sourceVector.getDataBuffer().refCnt();
+
+ // split and transfer with slice starting at the beginning: this should not allocate anything new
+ sourceVector.splitAndTransferTo(0, 2, targetVector);
+ assertEquals(allocatedMem, allocator.getAllocatedMemory());
+ // The validity and offset buffers are sliced from a same buffer.See BaseFixedWidthVector#allocateBytes.
+ // Therefore, the refcnt of the validity buffer is increased once since the startIndex is 0. The refcnt of the
+ // offset buffer is increased as well for the same reason. This amounts to a total of 2.
+ assertEquals(validityRefCnt + 2, sourceVector.getValidityBuffer().refCnt());
+ assertEquals(offsetRefCnt + 2, sourceVector.getOffsetBuffer().refCnt());
+ assertEquals(dataRefCnt + 1, sourceVector.getDataBuffer().refCnt());
+ }
+ assertArrayEquals(STR1, targetVector.get(0));
+ assertArrayEquals(STR2, targetVector.get(1));
+ }
+ }
+
+ /**
+ * ARROW-7831: this checks that a vector that got sliced is still readable after the slice's allocator got closed.
+ */
+ @Test /* VarCharVector */
+ public void testSplitAndTransfer2() {
+ try (final VarCharVector sourceVector = newVarCharVector(EMPTY_SCHEMA_PATH, allocator)) {
+ try (final VarCharVector targetVector = newVarCharVector("split-target", allocator)) {
+ sourceVector.allocateNew(1024 * 10, 1024);
+
+ sourceVector.set(0, STR1);
+ sourceVector.set(1, STR2);
+ sourceVector.set(2, STR3);
+ sourceVector.setValueCount(3);
+
+ final long allocatedMem = allocator.getAllocatedMemory();
+ final int validityRefCnt = sourceVector.getValidityBuffer().refCnt();
+ final int offsetRefCnt = sourceVector.getOffsetBuffer().refCnt();
+ final int dataRefCnt = sourceVector.getDataBuffer().refCnt();
+
+ // split and transfer with slice starting at the beginning: this should not allocate anything new
+ sourceVector.splitAndTransferTo(0, 2, targetVector);
+ assertEquals(allocatedMem, allocator.getAllocatedMemory());
+ // The validity and offset buffers are sliced from a same buffer.See BaseFixedWidthVector#allocateBytes.
+ // Therefore, the refcnt of the validity buffer is increased once since the startIndex is 0. The refcnt of the
+ // offset buffer is increased as well for the same reason. This amounts to a total of 2.
+ assertEquals(validityRefCnt + 2, sourceVector.getValidityBuffer().refCnt());
+ assertEquals(offsetRefCnt + 2, sourceVector.getOffsetBuffer().refCnt());
+ assertEquals(dataRefCnt + 1, sourceVector.getDataBuffer().refCnt());
+ }
+ assertArrayEquals(STR1, sourceVector.get(0));
+ assertArrayEquals(STR2, sourceVector.get(1));
+ assertArrayEquals(STR3, sourceVector.get(2));
+ }
+ }
+
+ /**
+ * ARROW-7831: this checks an offset splitting optimization, in the case where all the values up to the start of the
+ * slice are null/empty, which avoids allocation for the offset buffer.
+ */
+ @Test /* VarCharVector */
+ public void testSplitAndTransfer3() {
+ try (final VarCharVector targetVector = newVarCharVector("split-target", allocator);
+ final VarCharVector sourceVector = newVarCharVector(EMPTY_SCHEMA_PATH, allocator)) {
+ sourceVector.allocateNew(1024 * 10, 1024);
+
+ sourceVector.set(0, new byte[0]);
+ sourceVector.setNull(1);
+ sourceVector.set(2, STR1);
+ sourceVector.set(3, STR2);
+ sourceVector.set(4, STR3);
+ sourceVector.setValueCount(5);
+
+ final long allocatedMem = allocator.getAllocatedMemory();
+ final int validityRefCnt = sourceVector.getValidityBuffer().refCnt();
+ final int offsetRefCnt = sourceVector.getOffsetBuffer().refCnt();
+ final int dataRefCnt = sourceVector.getDataBuffer().refCnt();
+
+ sourceVector.splitAndTransferTo(2, 2, targetVector);
+ // because the offset starts at 0 since the first 2 values are empty/null, the allocation only consists in
+ // the size needed for the validity buffer
+ final long validitySize =
+ DefaultRoundingPolicy.DEFAULT_ROUNDING_POLICY.getRoundedSize(
+ BaseValueVector.getValidityBufferSizeFromCount(2));
+ assertEquals(allocatedMem + validitySize, allocator.getAllocatedMemory());
+ // The validity and offset buffers are sliced from a same buffer.See BaseFixedWidthVector#allocateBytes.
+ // Since values up to the startIndex are empty/null, the offset buffer doesn't need to be reallocated and
+ // therefore its refcnt is increased by 1.
+ assertEquals(validityRefCnt + 1, sourceVector.getValidityBuffer().refCnt());
+ assertEquals(offsetRefCnt + 1, sourceVector.getOffsetBuffer().refCnt());
+ assertEquals(dataRefCnt + 1, sourceVector.getDataBuffer().refCnt());
+
+ assertArrayEquals(STR1, targetVector.get(0));
+ assertArrayEquals(STR2, targetVector.get(1));
+ }
+ }
+
+ /**
+ * ARROW-7831: ensures that data is transferred from one allocator to another in case of 0-index start special cases.
+ */
+ @Test /* VarCharVector */
+ public void testSplitAndTransfer4() {
+ try (final BufferAllocator targetAllocator = allocator.newChildAllocator("target-alloc", 256, 256);
+ final VarCharVector targetVector = newVarCharVector("split-target", targetAllocator)) {
+ try (final BufferAllocator sourceAllocator = allocator.newChildAllocator("source-alloc", 256, 256);
+ final VarCharVector sourceVector = newVarCharVector(EMPTY_SCHEMA_PATH, sourceAllocator)) {
+ sourceVector.allocateNew(50, 3);
+
+ sourceVector.set(0, STR1);
+ sourceVector.set(1, STR2);
+ sourceVector.set(2, STR3);
+ sourceVector.setValueCount(3);
+
+ final long allocatedMem = allocator.getAllocatedMemory();
+ final int validityRefCnt = sourceVector.getValidityBuffer().refCnt();
+ final int offsetRefCnt = sourceVector.getOffsetBuffer().refCnt();
+ final int dataRefCnt = sourceVector.getDataBuffer().refCnt();
+
+ // split and transfer with slice starting at the beginning: this should not allocate anything new
+ sourceVector.splitAndTransferTo(0, 2, targetVector);
+ assertEquals(allocatedMem, allocator.getAllocatedMemory());
+ // Unlike testSplitAndTransfer1 where the buffers originated from the same allocator, the refcnts of each
+ // buffers for this test should be the same as what the source allocator ended up with.
+ assertEquals(validityRefCnt, sourceVector.getValidityBuffer().refCnt());
+ assertEquals(offsetRefCnt, sourceVector.getOffsetBuffer().refCnt());
+ assertEquals(dataRefCnt, sourceVector.getDataBuffer().refCnt());
+ }
+ assertArrayEquals(STR1, targetVector.get(0));
+ assertArrayEquals(STR2, targetVector.get(1));
+ }
+ }
+
+ @Test /* VarCharVector */
+ public void testNullableVarType1() {
+
+ // Create a new value vector for 1024 integers.
+ try (final VarCharVector vector = newVarCharVector(EMPTY_SCHEMA_PATH, allocator)) {
+ vector.allocateNew(1024 * 10, 1024);
+
+ vector.set(0, STR1);
+ vector.set(1, STR2);
+ vector.set(2, STR3);
+ vector.setSafe(3, STR3, 1, STR3.length - 1);
+ vector.setSafe(4, STR3, 2, STR3.length - 2);
+ ByteBuffer str3ByteBuffer = ByteBuffer.wrap(STR3);
+ vector.setSafe(5, str3ByteBuffer, 1, STR3.length - 1);
+ vector.setSafe(6, str3ByteBuffer, 2, STR3.length - 2);
+
+ // Set with convenience function
+ Text txt = new Text("foo");
+ vector.setSafe(7, txt);
+
+ // Check the sample strings.
+ assertArrayEquals(STR1, vector.get(0));
+ assertArrayEquals(STR2, vector.get(1));
+ assertArrayEquals(STR3, vector.get(2));
+ assertArrayEquals(Arrays.copyOfRange(STR3, 1, STR3.length), vector.get(3));
+ assertArrayEquals(Arrays.copyOfRange(STR3, 2, STR3.length), vector.get(4));
+ assertArrayEquals(Arrays.copyOfRange(STR3, 1, STR3.length), vector.get(5));
+ assertArrayEquals(Arrays.copyOfRange(STR3, 2, STR3.length), vector.get(6));
+
+ // Check returning a Text object
+ assertEquals(txt, vector.getObject(7));
+
+ // Ensure null value throws.
+ boolean b = false;
+ assertNull(vector.get(8));
+ }
+ }
+
+ @Test /* VarBinaryVector */
+ public void testNullableVarType2() {
+
+ // Create a new value vector for 1024 integers.
+ try (final VarBinaryVector vector = newVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) {
+ vector.allocateNew(1024 * 10, 1024);
+
+ vector.set(0, STR1);
+ vector.set(1, STR2);
+ vector.set(2, STR3);
+ vector.setSafe(3, STR3, 1, STR3.length - 1);
+ vector.setSafe(4, STR3, 2, STR3.length - 2);
+ ByteBuffer str3ByteBuffer = ByteBuffer.wrap(STR3);
+ vector.setSafe(5, str3ByteBuffer, 1, STR3.length - 1);
+ vector.setSafe(6, str3ByteBuffer, 2, STR3.length - 2);
+
+ // Check the sample strings.
+ assertArrayEquals(STR1, vector.get(0));
+ assertArrayEquals(STR2, vector.get(1));
+ assertArrayEquals(STR3, vector.get(2));
+ assertArrayEquals(Arrays.copyOfRange(STR3, 1, STR3.length), vector.get(3));
+ assertArrayEquals(Arrays.copyOfRange(STR3, 2, STR3.length), vector.get(4));
+ assertArrayEquals(Arrays.copyOfRange(STR3, 1, STR3.length), vector.get(5));
+ assertArrayEquals(Arrays.copyOfRange(STR3, 2, STR3.length), vector.get(6));
+
+ // Ensure null value throws.
+ assertNull(vector.get(7));
+ }
+ }
+
+
+ /*
+ * generic tests
+ *
+ * -- lastSet() and setValueCount()
+ * -- fillEmpties()
+ * -- VectorLoader and VectorUnloader
+ * -- some realloc tests
+ *
+ * TODO:
+ *
+ * The realloc() related tests below should be moved up and we need to
+ * add realloc related tests (edge cases) for more vector types.
+ */
+
+ @Test /* Float8Vector */
+ public void testReallocAfterVectorTransfer1() {
+ try (final Float8Vector vector = new Float8Vector(EMPTY_SCHEMA_PATH, allocator)) {
+ int initialCapacity = 4096;
+ boolean error = false;
+
+ /* use the default capacity; 4096*8 => 32KB */
+ vector.setInitialCapacity(initialCapacity);
+ vector.allocateNew();
+
+ assertTrue(vector.getValueCapacity() >= initialCapacity);
+ initialCapacity = vector.getValueCapacity();
+
+ double baseValue = 100.375;
+
+ for (int i = 0; i < initialCapacity; i++) {
+ vector.setSafe(i, baseValue + (double) i);
+ }
+
+ /* the above setSafe calls should not have triggered a realloc as
+ * we are within the capacity. check the vector contents
+ */
+ assertEquals(initialCapacity, vector.getValueCapacity());
+
+ for (int i = 0; i < initialCapacity; i++) {
+ double value = vector.get(i);
+ assertEquals(baseValue + (double) i, value, 0);
+ }
+
+ /* this should trigger a realloc */
+ vector.setSafe(initialCapacity, baseValue + (double) initialCapacity);
+ assertTrue(vector.getValueCapacity() >= initialCapacity * 2);
+ int capacityAfterRealloc1 = vector.getValueCapacity();
+
+ for (int i = initialCapacity + 1; i < capacityAfterRealloc1; i++) {
+ vector.setSafe(i, baseValue + (double) i);
+ }
+
+ for (int i = 0; i < capacityAfterRealloc1; i++) {
+ double value = vector.get(i);
+ assertEquals(baseValue + (double) i, value, 0);
+ }
+
+ /* this should trigger a realloc */
+ vector.setSafe(capacityAfterRealloc1, baseValue + (double) (capacityAfterRealloc1));
+ assertTrue(vector.getValueCapacity() >= initialCapacity * 4);
+ int capacityAfterRealloc2 = vector.getValueCapacity();
+
+ for (int i = capacityAfterRealloc1 + 1; i < capacityAfterRealloc2; i++) {
+ vector.setSafe(i, baseValue + (double) i);
+ }
+
+ for (int i = 0; i < capacityAfterRealloc2; i++) {
+ double value = vector.get(i);
+ assertEquals(baseValue + (double) i, value, 0);
+ }
+
+ /* at this point we are working with a 128KB buffer data for this
+ * vector. now let's transfer this vector
+ */
+
+ TransferPair transferPair = vector.getTransferPair(allocator);
+ transferPair.transfer();
+
+ Float8Vector toVector = (Float8Vector) transferPair.getTo();
+
+ /* now let's realloc the toVector */
+ toVector.reAlloc();
+ assertTrue(toVector.getValueCapacity() >= initialCapacity * 8);
+
+ for (int i = 0; i < toVector.getValueCapacity(); i++) {
+ if (i < capacityAfterRealloc2) {
+ assertEquals(baseValue + (double) i, toVector.get(i), 0);
+ } else {
+ assertTrue(toVector.isNull(i));
+ }
+ }
+
+ toVector.close();
+ }
+ }
+
+ @Test /* Float8Vector */
+ public void testReallocAfterVectorTransfer2() {
+ try (final Float8Vector vector = new Float8Vector(EMPTY_SCHEMA_PATH, allocator)) {
+ int initialCapacity = 4096;
+ boolean error = false;
+
+ vector.allocateNew(initialCapacity);
+ assertTrue(vector.getValueCapacity() >= initialCapacity);
+ initialCapacity = vector.getValueCapacity();
+
+ double baseValue = 100.375;
+
+ for (int i = 0; i < initialCapacity; i++) {
+ vector.setSafe(i, baseValue + (double) i);
+ }
+
+ /* the above setSafe calls should not have triggered a realloc as
+ * we are within the capacity. check the vector contents
+ */
+ assertEquals(initialCapacity, vector.getValueCapacity());
+
+ for (int i = 0; i < initialCapacity; i++) {
+ double value = vector.get(i);
+ assertEquals(baseValue + (double) i, value, 0);
+ }
+
+ /* this should trigger a realloc */
+ vector.setSafe(initialCapacity, baseValue + (double) initialCapacity);
+ assertTrue(vector.getValueCapacity() >= initialCapacity * 2);
+ int capacityAfterRealloc1 = vector.getValueCapacity();
+
+ for (int i = initialCapacity + 1; i < capacityAfterRealloc1; i++) {
+ vector.setSafe(i, baseValue + (double) i);
+ }
+
+ for (int i = 0; i < capacityAfterRealloc1; i++) {
+ double value = vector.get(i);
+ assertEquals(baseValue + (double) i, value, 0);
+ }
+
+ /* this should trigger a realloc */
+ vector.setSafe(capacityAfterRealloc1, baseValue + (double) (capacityAfterRealloc1));
+ assertTrue(vector.getValueCapacity() >= initialCapacity * 4);
+ int capacityAfterRealloc2 = vector.getValueCapacity();
+
+ for (int i = capacityAfterRealloc1 + 1; i < capacityAfterRealloc2; i++) {
+ vector.setSafe(i, baseValue + (double) i);
+ }
+
+ for (int i = 0; i < capacityAfterRealloc2; i++) {
+ double value = vector.get(i);
+ assertEquals(baseValue + (double) i, value, 0);
+ }
+
+ /* at this point we are working with a 128KB buffer data for this
+ * vector. now let's transfer this vector
+ */
+
+ TransferPair transferPair = vector.getTransferPair(allocator);
+ transferPair.transfer();
+
+ Float8Vector toVector = (Float8Vector) transferPair.getTo();
+
+ /* check toVector contents before realloc */
+ for (int i = 0; i < toVector.getValueCapacity(); i++) {
+ assertFalse("unexpected null value at index: " + i, toVector.isNull(i));
+ double value = toVector.get(i);
+ assertEquals("unexpected value at index: " + i, baseValue + (double) i, value, 0);
+ }
+
+ /* now let's realloc the toVector and check contents again */
+ toVector.reAlloc();
+ assertTrue(toVector.getValueCapacity() >= initialCapacity * 8);
+
+ for (int i = 0; i < toVector.getValueCapacity(); i++) {
+ if (i < capacityAfterRealloc2) {
+ assertFalse("unexpected null value at index: " + i, toVector.isNull(i));
+ double value = toVector.get(i);
+ assertEquals("unexpected value at index: " + i, baseValue + (double) i, value, 0);
+ } else {
+ assertTrue("unexpected non-null value at index: " + i, toVector.isNull(i));
+ }
+ }
+
+ toVector.close();
+ }
+ }
+
+ @Test /* VarCharVector */
+ public void testReallocAfterVectorTransfer3() {
+ try (final VarCharVector vector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator)) {
+ /* 4096 values with 10 byte per record */
+ vector.allocateNew(4096 * 10, 4096);
+ int valueCapacity = vector.getValueCapacity();
+ assertTrue(valueCapacity >= 4096);
+
+ /* populate the vector */
+ for (int i = 0; i < valueCapacity; i++) {
+ if ((i & 1) == 1) {
+ vector.set(i, STR1);
+ } else {
+ vector.set(i, STR2);
+ }
+ }
+
+ /* Check the vector output */
+ for (int i = 0; i < valueCapacity; i++) {
+ if ((i & 1) == 1) {
+ assertArrayEquals(STR1, vector.get(i));
+ } else {
+ assertArrayEquals(STR2, vector.get(i));
+ }
+ }
+
+ /* trigger first realloc */
+ vector.setSafe(valueCapacity, STR2, 0, STR2.length);
+ assertTrue(vector.getValueCapacity() >= 2 * valueCapacity);
+ while (vector.getByteCapacity() < 10 * vector.getValueCapacity()) {
+ vector.reallocDataBuffer();
+ }
+
+ /* populate the remaining vector */
+ for (int i = valueCapacity; i < vector.getValueCapacity(); i++) {
+ if ((i & 1) == 1) {
+ vector.set(i, STR1);
+ } else {
+ vector.set(i, STR2);
+ }
+ }
+
+ /* Check the vector output */
+ valueCapacity = vector.getValueCapacity();
+ for (int i = 0; i < valueCapacity; i++) {
+ if ((i & 1) == 1) {
+ assertArrayEquals(STR1, vector.get(i));
+ } else {
+ assertArrayEquals(STR2, vector.get(i));
+ }
+ }
+
+ /* trigger second realloc */
+ vector.setSafe(valueCapacity + 10, STR2, 0, STR2.length);
+ assertTrue(vector.getValueCapacity() >= 2 * valueCapacity);
+ while (vector.getByteCapacity() < 10 * vector.getValueCapacity()) {
+ vector.reallocDataBuffer();
+ }
+
+ /* populate the remaining vector */
+ for (int i = valueCapacity; i < vector.getValueCapacity(); i++) {
+ if ((i & 1) == 1) {
+ vector.set(i, STR1);
+ } else {
+ vector.set(i, STR2);
+ }
+ }
+
+ /* Check the vector output */
+ valueCapacity = vector.getValueCapacity();
+ for (int i = 0; i < valueCapacity; i++) {
+ if ((i & 1) == 1) {
+ assertArrayEquals(STR1, vector.get(i));
+ } else {
+ assertArrayEquals(STR2, vector.get(i));
+ }
+ }
+
+ /* we are potentially working with 4x the size of vector buffer
+ * that we initially started with. Now let's transfer the vector.
+ */
+
+ TransferPair transferPair = vector.getTransferPair(allocator);
+ transferPair.transfer();
+ VarCharVector toVector = (VarCharVector) transferPair.getTo();
+ valueCapacity = toVector.getValueCapacity();
+
+ for (int i = 0; i < valueCapacity; i++) {
+ if ((i & 1) == 1) {
+ assertArrayEquals(STR1, toVector.get(i));
+ } else {
+ assertArrayEquals(STR2, toVector.get(i));
+ }
+ }
+
+ toVector.close();
+ }
+ }
+
+ @Test /* IntVector */
+ public void testReallocAfterVectorTransfer4() {
+ try (final IntVector vector = new IntVector(EMPTY_SCHEMA_PATH, allocator)) {
+
+ /* 4096 values */
+ vector.allocateNew(4096);
+ int valueCapacity = vector.getValueCapacity();
+ assertTrue(valueCapacity >= 4096);
+
+ /* populate the vector */
+ int baseValue = 1000;
+ for (int i = 0; i < valueCapacity; i++) {
+ if ((i & 1) == 0) {
+ vector.set(i, 1000 + i);
+ }
+ }
+
+ /* Check the vector output */
+ for (int i = 0; i < valueCapacity; i++) {
+ if ((i & 1) == 0) {
+ assertEquals(1000 + i, vector.get(i));
+ } else {
+ assertTrue(vector.isNull(i));
+ }
+ }
+
+ /* trigger first realloc */
+ vector.setSafe(valueCapacity, 10000000);
+ assertTrue(vector.getValueCapacity() >= valueCapacity * 2);
+
+ /* populate the remaining vector */
+ for (int i = valueCapacity; i < vector.getValueCapacity(); i++) {
+ if ((i & 1) == 0) {
+ vector.set(i, 1000 + i);
+ }
+ }
+
+ /* Check the vector output */
+ valueCapacity = vector.getValueCapacity();
+ for (int i = 0; i < valueCapacity; i++) {
+ if ((i & 1) == 0) {
+ assertEquals(1000 + i, vector.get(i));
+ } else {
+ assertTrue(vector.isNull(i));
+ }
+ }
+
+ /* trigger second realloc */
+ vector.setSafe(valueCapacity, 10000000);
+ assertTrue(vector.getValueCapacity() >= valueCapacity * 2);
+
+ /* populate the remaining vector */
+ for (int i = valueCapacity; i < vector.getValueCapacity(); i++) {
+ if ((i & 1) == 0) {
+ vector.set(i, 1000 + i);
+ }
+ }
+
+ /* Check the vector output */
+ valueCapacity = vector.getValueCapacity();
+ for (int i = 0; i < valueCapacity; i++) {
+ if ((i & 1) == 0) {
+ assertEquals(1000 + i, vector.get(i));
+ } else {
+ assertTrue(vector.isNull(i));
+ }
+ }
+
+ /* we are potentially working with 4x the size of vector buffer
+ * that we initially started with. Now let's transfer the vector.
+ */
+
+ TransferPair transferPair = vector.getTransferPair(allocator);
+ transferPair.transfer();
+ IntVector toVector = (IntVector) transferPair.getTo();
+ /* value capacity of source and target vectors should be same after
+ * the transfer.
+ */
+ assertEquals(valueCapacity, toVector.getValueCapacity());
+
+ for (int i = 0; i < valueCapacity; i++) {
+ if ((i & 1) == 0) {
+ assertEquals(1000 + i, toVector.get(i));
+ } else {
+ assertTrue(toVector.isNull(i));
+ }
+ }
+
+ toVector.close();
+ }
+ }
+
+ @Test
+ public void testReAllocFixedWidthVector() {
+ // Create a new value vector for 1024 integers
+ try (final Float4Vector vector = newVector(Float4Vector.class, EMPTY_SCHEMA_PATH, MinorType.FLOAT4, allocator)) {
+ vector.allocateNew(1024);
+
+ assertTrue(vector.getValueCapacity() >= 1024);
+ int initialCapacity = vector.getValueCapacity();
+
+ // Put values in indexes that fall within the initial allocation
+ vector.setSafe(0, 100.1f);
+ vector.setSafe(100, 102.3f);
+ vector.setSafe(1023, 104.5f);
+
+ // Now try to put values in space that falls beyond the initial allocation
+ vector.setSafe(2000, 105.5f);
+
+ // Check valueCapacity is more than initial allocation
+ assertTrue(vector.getValueCapacity() >= 2 * initialCapacity);
+
+ assertEquals(100.1f, vector.get(0), 0);
+ assertEquals(102.3f, vector.get(100), 0);
+ assertEquals(104.5f, vector.get(1023), 0);
+ assertEquals(105.5f, vector.get(2000), 0);
+
+ // Set the valueCount to be more than valueCapacity of current allocation. This is possible for ValueVectors
+ // as we don't call setSafe for null values, but we do call setValueCount when all values are inserted into the
+ // vector
+ vector.setValueCount(vector.getValueCapacity() + 200);
+ }
+ }
+
+ @Test
+ public void testReAllocVariableWidthVector() {
+ try (final VarCharVector vector = newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator)) {
+ vector.setInitialCapacity(4095);
+ vector.allocateNew();
+
+ int initialCapacity = vector.getValueCapacity();
+ assertTrue(initialCapacity >= 4095);
+
+ /* Put values in indexes that fall within the initial allocation */
+ vector.setSafe(0, STR1, 0, STR1.length);
+ vector.setSafe(initialCapacity - 1, STR2, 0, STR2.length);
+
+ /* the above set calls should NOT have triggered a realloc */
+ assertEquals(initialCapacity, vector.getValueCapacity());
+
+ /* Now try to put values in space that falls beyond the initial allocation */
+ vector.setSafe(initialCapacity + 200, STR3, 0, STR3.length);
+
+ /* Check valueCapacity is more than initial allocation */
+ assertTrue(initialCapacity * 2 <= vector.getValueCapacity());
+
+ assertArrayEquals(STR1, vector.get(0));
+ assertArrayEquals(STR2, vector.get(initialCapacity - 1));
+ assertArrayEquals(STR3, vector.get(initialCapacity + 200));
+
+ // Set the valueCount to be more than valueCapacity of current allocation. This is possible for ValueVectors
+ // as we don't call setSafe for null values, but we do call setValueCount when the current batch is processed.
+ vector.setValueCount(vector.getValueCapacity() + 200);
+ }
+ }
+
+ @Test
+ public void testFillEmptiesNotOverfill() {
+ try (final VarCharVector vector = newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator)) {
+ vector.setInitialCapacity(4095);
+ vector.allocateNew();
+
+ int initialCapacity = vector.getValueCapacity();
+ assertTrue(initialCapacity >= 4095);
+
+ vector.setSafe(4094, "hello".getBytes(), 0, 5);
+ /* the above set method should NOT have triggered a realloc */
+ assertEquals(initialCapacity, vector.getValueCapacity());
+
+ long bufSizeBefore = vector.getFieldBuffers().get(1).capacity();
+ vector.setValueCount(initialCapacity);
+ assertEquals(bufSizeBefore, vector.getFieldBuffers().get(1).capacity());
+ assertEquals(initialCapacity, vector.getValueCapacity());
+ }
+ }
+
+ @Test
+ public void testSetSafeWithArrowBufNoExcessAllocs() {
+ final int numValues = BaseFixedWidthVector.INITIAL_VALUE_ALLOCATION * 2;
+ final byte[] valueBytes = "hello world".getBytes();
+ final int valueBytesLength = valueBytes.length;
+ final int isSet = 1;
+
+ try (
+ final VarCharVector fromVector = newVector(VarCharVector.class, EMPTY_SCHEMA_PATH,
+ MinorType.VARCHAR, allocator);
+ final VarCharVector toVector = newVector(VarCharVector.class, EMPTY_SCHEMA_PATH,
+ MinorType.VARCHAR, allocator)) {
+ /*
+ * Populate the from vector with 'numValues' with byte-arrays, each of size 'valueBytesLength'.
+ */
+ fromVector.setInitialCapacity(numValues);
+ fromVector.allocateNew();
+ for (int i = 0; i < numValues; ++i) {
+ fromVector.setSafe(i, valueBytes, 0 /*start*/, valueBytesLength);
+ }
+ fromVector.setValueCount(numValues);
+ ArrowBuf fromDataBuffer = fromVector.getDataBuffer();
+ assertTrue(numValues * valueBytesLength <= fromDataBuffer.capacity());
+
+ /*
+ * Copy the entries one-by-one from 'fromVector' to 'toVector', but use the setSafe with
+ * ArrowBuf API (instead of setSafe with byte-array).
+ */
+ toVector.setInitialCapacity(numValues);
+ toVector.allocateNew();
+ for (int i = 0; i < numValues; i++) {
+ int start = fromVector.getStartOffset(i);
+ int end = fromVector.getStartOffset(i + 1);
+ toVector.setSafe(i, isSet, start, end, fromDataBuffer);
+ }
+
+ /*
+ * Since the 'fromVector' and 'toVector' have the same initial capacity, and were populated
+ * with the same varchar elements, the allocations and hence, the final capacity should be
+ * the same.
+ */
+ assertEquals(fromDataBuffer.capacity(), toVector.getDataBuffer().capacity());
+ }
+ }
+
+ @Test
+ public void testCopyFromWithNulls() {
+ try (final VarCharVector vector = newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator);
+ final VarCharVector vector2 =
+ newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator)) {
+
+ vector.setInitialCapacity(4095);
+ vector.allocateNew();
+ int capacity = vector.getValueCapacity();
+ assertTrue(capacity >= 4095);
+
+ for (int i = 0; i < capacity; i++) {
+ if (i % 3 == 0) {
+ continue;
+ }
+ byte[] b = Integer.toString(i).getBytes();
+ vector.setSafe(i, b, 0, b.length);
+ }
+
+ /* NO reAlloc() should have happened in setSafe() */
+ assertEquals(capacity, vector.getValueCapacity());
+
+ vector.setValueCount(capacity);
+
+ for (int i = 0; i < capacity; i++) {
+ if (i % 3 == 0) {
+ assertNull(vector.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, Integer.toString(i), vector.getObject(i).toString());
+ }
+ }
+
+ vector2.setInitialCapacity(4095);
+ vector2.allocateNew();
+ int capacity2 = vector2.getValueCapacity();
+ assertEquals(capacity2, capacity);
+
+ for (int i = 0; i < capacity; i++) {
+ vector2.copyFromSafe(i, i, vector);
+ if (i % 3 == 0) {
+ assertNull(vector2.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, Integer.toString(i), vector2.getObject(i).toString());
+ }
+ }
+
+ /* NO reAlloc() should have happened in copyFrom */
+ assertEquals(capacity, vector2.getValueCapacity());
+
+ vector2.setValueCount(capacity);
+
+ for (int i = 0; i < capacity; i++) {
+ if (i % 3 == 0) {
+ assertNull(vector2.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, Integer.toString(i), vector2.getObject(i).toString());
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testCopyFromWithNulls1() {
+ try (final VarCharVector vector = newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator);
+ final VarCharVector vector2 =
+ newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator)) {
+
+ vector.setInitialCapacity(4095);
+ vector.allocateNew();
+ int capacity = vector.getValueCapacity();
+ assertTrue(capacity >= 4095);
+
+ for (int i = 0; i < capacity; i++) {
+ if (i % 3 == 0) {
+ continue;
+ }
+ byte[] b = Integer.toString(i).getBytes();
+ vector.setSafe(i, b, 0, b.length);
+ }
+
+ /* NO reAlloc() should have happened in setSafe() */
+ assertEquals(capacity, vector.getValueCapacity());
+
+ vector.setValueCount(capacity);
+
+ for (int i = 0; i < capacity; i++) {
+ if (i % 3 == 0) {
+ assertNull(vector.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, Integer.toString(i), vector.getObject(i).toString());
+ }
+ }
+
+ /* set lesser initial capacity than actually needed
+ * to trigger reallocs in copyFromSafe()
+ */
+ vector2.allocateNew(1024 * 10, 1024);
+
+ int capacity2 = vector2.getValueCapacity();
+ assertTrue(capacity2 >= 1024);
+ assertTrue(capacity2 <= capacity);
+
+ for (int i = 0; i < capacity; i++) {
+ vector2.copyFromSafe(i, i, vector);
+ if (i % 3 == 0) {
+ assertNull(vector2.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, Integer.toString(i), vector2.getObject(i).toString());
+ }
+ }
+
+ /* 2 reAllocs should have happened in copyFromSafe() */
+ assertEquals(capacity, vector2.getValueCapacity());
+
+ vector2.setValueCount(capacity);
+
+ for (int i = 0; i < capacity; i++) {
+ if (i % 3 == 0) {
+ assertNull(vector2.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, Integer.toString(i), vector2.getObject(i).toString());
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testSetLastSetUsage() {
+ try (final VarCharVector vector = new VarCharVector("myvector", allocator)) {
+ vector.allocateNew(1024 * 10, 1024);
+
+ setBytes(0, STR1, vector);
+ setBytes(1, STR2, vector);
+ setBytes(2, STR3, vector);
+ setBytes(3, STR4, vector);
+ setBytes(4, STR5, vector);
+ setBytes(5, STR6, vector);
+
+ /* Check current lastSet */
+ assertEquals(-1, vector.getLastSet());
+
+ /* Check the vector output */
+ assertArrayEquals(STR1, vector.get(0));
+ assertArrayEquals(STR2, vector.get(1));
+ assertArrayEquals(STR3, vector.get(2));
+ assertArrayEquals(STR4, vector.get(3));
+ assertArrayEquals(STR5, vector.get(4));
+ assertArrayEquals(STR6, vector.get(5));
+
+ /*
+ * If we don't do setLastSe(5) before setValueCount(), then the latter will corrupt
+ * the value vector by filling in all positions [0,valuecount-1] will empty byte arrays.
+ * Run the test by commenting out next line and we should see incorrect vector output.
+ */
+ vector.setLastSet(5);
+ vector.setValueCount(20);
+
+ /* Check current lastSet */
+ assertEquals(19, vector.getLastSet());
+
+ /* Check the vector output again */
+ assertArrayEquals(STR1, vector.get(0));
+ assertArrayEquals(STR2, vector.get(1));
+ assertArrayEquals(STR3, vector.get(2));
+ assertArrayEquals(STR4, vector.get(3));
+ assertArrayEquals(STR5, vector.get(4));
+ assertArrayEquals(STR6, vector.get(5));
+
+ assertEquals(0, vector.getValueLength(6));
+ assertEquals(0, vector.getValueLength(7));
+ assertEquals(0, vector.getValueLength(8));
+ assertEquals(0, vector.getValueLength(9));
+ assertEquals(0, vector.getValueLength(10));
+ assertEquals(0, vector.getValueLength(11));
+ assertEquals(0, vector.getValueLength(12));
+ assertEquals(0, vector.getValueLength(13));
+ assertEquals(0, vector.getValueLength(14));
+ assertEquals(0, vector.getValueLength(15));
+ assertEquals(0, vector.getValueLength(16));
+ assertEquals(0, vector.getValueLength(17));
+ assertEquals(0, vector.getValueLength(18));
+ assertEquals(0, vector.getValueLength(19));
+
+ /* Check offsets */
+ assertEquals(0, vector.offsetBuffer.getInt(0 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(6, vector.offsetBuffer.getInt(1 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(16, vector.offsetBuffer.getInt(2 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(21, vector.offsetBuffer.getInt(3 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(30, vector.offsetBuffer.getInt(4 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(34, vector.offsetBuffer.getInt(5 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getInt(6 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getInt(7 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getInt(8 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getInt(9 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getInt(10 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getInt(11 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getInt(12 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getInt(13 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getInt(14 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getInt(15 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getInt(16 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getInt(17 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getInt(18 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getInt(19 * BaseVariableWidthVector.OFFSET_WIDTH));
+
+ vector.set(19, STR6);
+ assertArrayEquals(STR6, vector.get(19));
+ assertEquals(40, vector.offsetBuffer.getInt(19 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(46, vector.offsetBuffer.getInt(20 * BaseVariableWidthVector.OFFSET_WIDTH));
+ }
+ }
+
+ @Test
+ public void testVectorLoadUnload() {
+
+ try (final VarCharVector vector1 = new VarCharVector("myvector", allocator)) {
+
+ setVector(vector1, STR1, STR2, STR3, STR4, STR5, STR6);
+
+ assertEquals(5, vector1.getLastSet());
+ vector1.setValueCount(15);
+ assertEquals(14, vector1.getLastSet());
+
+ /* Check the vector output */
+ assertArrayEquals(STR1, vector1.get(0));
+ assertArrayEquals(STR2, vector1.get(1));
+ assertArrayEquals(STR3, vector1.get(2));
+ assertArrayEquals(STR4, vector1.get(3));
+ assertArrayEquals(STR5, vector1.get(4));
+ assertArrayEquals(STR6, vector1.get(5));
+
+ Field field = vector1.getField();
+ String fieldName = field.getName();
+
+ List<Field> fields = new ArrayList<>();
+ List<FieldVector> fieldVectors = new ArrayList<>();
+
+ fields.add(field);
+ fieldVectors.add(vector1);
+
+ Schema schema = new Schema(fields);
+
+ VectorSchemaRoot schemaRoot1 = new VectorSchemaRoot(schema, fieldVectors, vector1.getValueCount());
+ VectorUnloader vectorUnloader = new VectorUnloader(schemaRoot1);
+
+ try (
+ ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch();
+ BufferAllocator finalVectorsAllocator = allocator.newChildAllocator("new vector", 0, Long.MAX_VALUE);
+ VectorSchemaRoot schemaRoot2 = VectorSchemaRoot.create(schema, finalVectorsAllocator);
+ ) {
+
+ VectorLoader vectorLoader = new VectorLoader(schemaRoot2);
+ vectorLoader.load(recordBatch);
+
+ VarCharVector vector2 = (VarCharVector) schemaRoot2.getVector(fieldName);
+ /*
+ * lastSet would have internally been set by VectorLoader.load() when it invokes
+ * loadFieldBuffers.
+ */
+ assertEquals(14, vector2.getLastSet());
+ vector2.setValueCount(25);
+ assertEquals(24, vector2.getLastSet());
+
+ /* Check the vector output */
+ assertArrayEquals(STR1, vector2.get(0));
+ assertArrayEquals(STR2, vector2.get(1));
+ assertArrayEquals(STR3, vector2.get(2));
+ assertArrayEquals(STR4, vector2.get(3));
+ assertArrayEquals(STR5, vector2.get(4));
+ assertArrayEquals(STR6, vector2.get(5));
+ }
+ }
+ }
+
+ @Test
+ public void testFillEmptiesUsage() {
+ try (final VarCharVector vector = new VarCharVector("myvector", allocator)) {
+
+ vector.allocateNew(1024 * 10, 1024);
+
+ setBytes(0, STR1, vector);
+ setBytes(1, STR2, vector);
+ setBytes(2, STR3, vector);
+ setBytes(3, STR4, vector);
+ setBytes(4, STR5, vector);
+ setBytes(5, STR6, vector);
+
+ /* Check current lastSet */
+ assertEquals(-1, vector.getLastSet());
+
+ /* Check the vector output */
+ assertArrayEquals(STR1, vector.get(0));
+ assertArrayEquals(STR2, vector.get(1));
+ assertArrayEquals(STR3, vector.get(2));
+ assertArrayEquals(STR4, vector.get(3));
+ assertArrayEquals(STR5, vector.get(4));
+ assertArrayEquals(STR6, vector.get(5));
+
+ vector.setLastSet(5);
+ /* fill empty byte arrays from index [6, 9] */
+ vector.fillEmpties(10);
+
+ /* Check current lastSet */
+ assertEquals(9, vector.getLastSet());
+
+ /* Check the vector output */
+ assertArrayEquals(STR1, vector.get(0));
+ assertArrayEquals(STR2, vector.get(1));
+ assertArrayEquals(STR3, vector.get(2));
+ assertArrayEquals(STR4, vector.get(3));
+ assertArrayEquals(STR5, vector.get(4));
+ assertArrayEquals(STR6, vector.get(5));
+ assertEquals(0, vector.getValueLength(6));
+ assertEquals(0, vector.getValueLength(7));
+ assertEquals(0, vector.getValueLength(8));
+ assertEquals(0, vector.getValueLength(9));
+
+ setBytes(10, STR1, vector);
+ setBytes(11, STR2, vector);
+
+ vector.setLastSet(11);
+ /* fill empty byte arrays from index [12, 14] */
+ vector.setValueCount(15);
+
+ /* Check current lastSet */
+ assertEquals(14, vector.getLastSet());
+
+ /* Check the vector output */
+ assertArrayEquals(STR1, vector.get(0));
+ assertArrayEquals(STR2, vector.get(1));
+ assertArrayEquals(STR3, vector.get(2));
+ assertArrayEquals(STR4, vector.get(3));
+ assertArrayEquals(STR5, vector.get(4));
+ assertArrayEquals(STR6, vector.get(5));
+ assertEquals(0, vector.getValueLength(6));
+ assertEquals(0, vector.getValueLength(7));
+ assertEquals(0, vector.getValueLength(8));
+ assertEquals(0, vector.getValueLength(9));
+ assertArrayEquals(STR1, vector.get(10));
+ assertArrayEquals(STR2, vector.get(11));
+ assertEquals(0, vector.getValueLength(12));
+ assertEquals(0, vector.getValueLength(13));
+ assertEquals(0, vector.getValueLength(14));
+
+ /* Check offsets */
+ assertEquals(0,
+ vector.offsetBuffer.getInt(0 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(6,
+ vector.offsetBuffer.getInt(1 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(16,
+ vector.offsetBuffer.getInt(2 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(21,
+ vector.offsetBuffer.getInt(3 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(30,
+ vector.offsetBuffer.getInt(4 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(34,
+ vector.offsetBuffer.getInt(5 * BaseVariableWidthVector.OFFSET_WIDTH));
+
+ assertEquals(40,
+ vector.offsetBuffer.getInt(6 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40,
+ vector.offsetBuffer.getInt(7 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40,
+ vector.offsetBuffer.getInt(8 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40,
+ vector.offsetBuffer.getInt(9 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40,
+ vector.offsetBuffer.getInt(10 * BaseVariableWidthVector.OFFSET_WIDTH));
+
+ assertEquals(46,
+ vector.offsetBuffer.getInt(11 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(56,
+ vector.offsetBuffer.getInt(12 * BaseVariableWidthVector.OFFSET_WIDTH));
+
+ assertEquals(56,
+ vector.offsetBuffer.getInt(13 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(56,
+ vector.offsetBuffer.getInt(14 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(56,
+ vector.offsetBuffer.getInt(15 * BaseVariableWidthVector.OFFSET_WIDTH));
+ }
+ }
+
+ @Test /* VarCharVector */
+ public void testGetBufferAddress1() {
+
+ try (final VarCharVector vector = new VarCharVector("myvector", allocator)) {
+
+ setVector(vector, STR1, STR2, STR3, STR4, STR5, STR6);
+ vector.setValueCount(15);
+
+ /* check the vector output */
+ assertArrayEquals(STR1, vector.get(0));
+ assertArrayEquals(STR2, vector.get(1));
+ assertArrayEquals(STR3, vector.get(2));
+ assertArrayEquals(STR4, vector.get(3));
+ assertArrayEquals(STR5, vector.get(4));
+ assertArrayEquals(STR6, vector.get(5));
+
+ List<ArrowBuf> buffers = vector.getFieldBuffers();
+ long bitAddress = vector.getValidityBufferAddress();
+ long offsetAddress = vector.getOffsetBufferAddress();
+ long dataAddress = vector.getDataBufferAddress();
+
+ assertEquals(3, buffers.size());
+ assertEquals(bitAddress, buffers.get(0).memoryAddress());
+ assertEquals(offsetAddress, buffers.get(1).memoryAddress());
+ assertEquals(dataAddress, buffers.get(2).memoryAddress());
+ }
+ }
+
+ @Test /* IntVector */
+ public void testGetBufferAddress2() {
+ try (final IntVector vector = new IntVector("myvector", allocator)) {
+ boolean error = false;
+ vector.allocateNew(16);
+
+ /* populate the vector */
+ for (int i = 0; i < 16; i += 2) {
+ vector.set(i, i + 10);
+ }
+
+ /* check the vector output */
+ for (int i = 0; i < 16; i += 2) {
+ assertEquals(i + 10, vector.get(i));
+ }
+
+ List<ArrowBuf> buffers = vector.getFieldBuffers();
+ long bitAddress = vector.getValidityBufferAddress();
+ long dataAddress = vector.getDataBufferAddress();
+
+ try {
+ long offsetAddress = vector.getOffsetBufferAddress();
+ } catch (UnsupportedOperationException ue) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ }
+
+ assertEquals(2, buffers.size());
+ assertEquals(bitAddress, buffers.get(0).memoryAddress());
+ assertEquals(dataAddress, buffers.get(1).memoryAddress());
+ }
+ }
+
+ @Test
+ public void testMultipleClose() {
+ BufferAllocator vectorAllocator = allocator.newChildAllocator("vector_allocator", 0, Long.MAX_VALUE);
+ IntVector vector = newVector(IntVector.class, EMPTY_SCHEMA_PATH, MinorType.INT, vectorAllocator);
+ vector.close();
+ vectorAllocator.close();
+ vector.close();
+ vectorAllocator.close();
+ }
+
+ /* this method is used by the tests to bypass the vector set methods that manipulate
+ * lastSet. The method is to test the lastSet property and that's why we load the vector
+ * in a way that lastSet is not set automatically.
+ */
+ public static void setBytes(int index, byte[] bytes, VarCharVector vector) {
+ final int currentOffset = vector.offsetBuffer.getInt(index * BaseVariableWidthVector.OFFSET_WIDTH);
+
+ BitVectorHelper.setBit(vector.validityBuffer, index);
+ vector.offsetBuffer.setInt((index + 1) * BaseVariableWidthVector.OFFSET_WIDTH, currentOffset + bytes.length);
+ vector.valueBuffer.setBytes(currentOffset, bytes, 0, bytes.length);
+ }
+
+ @Test /* VarCharVector */
+ public void testSetInitialCapacity() {
+ try (final VarCharVector vector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator)) {
+
+ /* use the default 8 data bytes on average per element */
+ int defaultCapacity = BaseValueVector.INITIAL_VALUE_ALLOCATION - 1;
+ vector.setInitialCapacity(defaultCapacity);
+ vector.allocateNew();
+ assertEquals(defaultCapacity, vector.getValueCapacity());
+ assertEquals(CommonUtil.nextPowerOfTwo(defaultCapacity * 8), vector.getDataBuffer().capacity());
+
+ vector.setInitialCapacity(defaultCapacity, 1);
+ vector.allocateNew();
+ assertEquals(defaultCapacity, vector.getValueCapacity());
+ assertEquals(CommonUtil.nextPowerOfTwo(defaultCapacity), vector.getDataBuffer().capacity());
+
+ vector.setInitialCapacity(defaultCapacity, 0.1);
+ vector.allocateNew();
+ assertEquals(defaultCapacity, vector.getValueCapacity());
+ assertEquals(CommonUtil.nextPowerOfTwo((int) (defaultCapacity * 0.1)), vector.getDataBuffer().capacity());
+
+ vector.setInitialCapacity(defaultCapacity, 0.01);
+ vector.allocateNew();
+ assertEquals(defaultCapacity, vector.getValueCapacity());
+ assertEquals(CommonUtil.nextPowerOfTwo((int) (defaultCapacity * 0.01)), vector.getDataBuffer().capacity());
+
+ vector.setInitialCapacity(5, 0.01);
+ vector.allocateNew();
+ assertEquals(5, vector.getValueCapacity());
+ assertEquals(2, vector.getDataBuffer().capacity());
+ }
+ }
+
+ @Test
+ public void testDefaultAllocNewAll() {
+ int defaultCapacity = BaseValueVector.INITIAL_VALUE_ALLOCATION;
+ int expectedSize;
+ long beforeSize;
+ try (BufferAllocator childAllocator = allocator.newChildAllocator("defaultAllocs", 0, Long.MAX_VALUE);
+ final IntVector intVector = new IntVector(EMPTY_SCHEMA_PATH, childAllocator);
+ final BigIntVector bigIntVector = new BigIntVector(EMPTY_SCHEMA_PATH, childAllocator);
+ final BitVector bitVector = new BitVector(EMPTY_SCHEMA_PATH, childAllocator);
+ final DecimalVector decimalVector = new DecimalVector(EMPTY_SCHEMA_PATH, childAllocator, 38, 6);
+ final VarCharVector varCharVector = new VarCharVector(EMPTY_SCHEMA_PATH, childAllocator)) {
+
+ // verify that the wastage is within bounds for IntVector.
+ beforeSize = childAllocator.getAllocatedMemory();
+ intVector.allocateNew();
+ assertTrue(intVector.getValueCapacity() >= defaultCapacity);
+ expectedSize = (defaultCapacity * IntVector.TYPE_WIDTH) +
+ BaseFixedWidthVector.getValidityBufferSizeFromCount(defaultCapacity);
+ assertTrue(childAllocator.getAllocatedMemory() - beforeSize <= expectedSize * 1.05);
+
+ // verify that the wastage is within bounds for BigIntVector.
+ beforeSize = childAllocator.getAllocatedMemory();
+ bigIntVector.allocateNew();
+ assertTrue(bigIntVector.getValueCapacity() >= defaultCapacity);
+ expectedSize = (defaultCapacity * bigIntVector.TYPE_WIDTH) +
+ BaseFixedWidthVector.getValidityBufferSizeFromCount(defaultCapacity);
+ assertTrue(childAllocator.getAllocatedMemory() - beforeSize <= expectedSize * 1.05);
+
+ // verify that the wastage is within bounds for DecimalVector.
+ beforeSize = childAllocator.getAllocatedMemory();
+ decimalVector.allocateNew();
+ assertTrue(decimalVector.getValueCapacity() >= defaultCapacity);
+ expectedSize = (defaultCapacity * decimalVector.TYPE_WIDTH) +
+ BaseFixedWidthVector.getValidityBufferSizeFromCount(defaultCapacity);
+ assertTrue(childAllocator.getAllocatedMemory() - beforeSize <= expectedSize * 1.05);
+
+ // verify that the wastage is within bounds for VarCharVector.
+ // var char vector have an offsets array that is 1 less than defaultCapacity
+ beforeSize = childAllocator.getAllocatedMemory();
+ varCharVector.allocateNew();
+ assertTrue(varCharVector.getValueCapacity() >= defaultCapacity - 1);
+ expectedSize = (defaultCapacity * VarCharVector.OFFSET_WIDTH) +
+ BaseFixedWidthVector.getValidityBufferSizeFromCount(defaultCapacity) +
+ defaultCapacity * 8;
+ // wastage should be less than 5%.
+ assertTrue(childAllocator.getAllocatedMemory() - beforeSize <= expectedSize * 1.05);
+
+ // verify that the wastage is within bounds for BitVector.
+ beforeSize = childAllocator.getAllocatedMemory();
+ bitVector.allocateNew();
+ assertTrue(bitVector.getValueCapacity() >= defaultCapacity);
+ expectedSize = BaseFixedWidthVector.getValidityBufferSizeFromCount(defaultCapacity) * 2;
+ assertTrue(childAllocator.getAllocatedMemory() - beforeSize <= expectedSize * 1.05);
+
+ }
+ }
+
+ @Test
+ public void testSetNullableVarCharHolder() {
+ try (VarCharVector vector = new VarCharVector("", allocator)) {
+ vector.allocateNew(100, 10);
+
+ NullableVarCharHolder nullHolder = new NullableVarCharHolder();
+ nullHolder.isSet = 0;
+
+ NullableVarCharHolder stringHolder = new NullableVarCharHolder();
+ stringHolder.isSet = 1;
+
+ String str = "hello";
+ ArrowBuf buf = allocator.buffer(16);
+ buf.setBytes(0, str.getBytes());
+
+ stringHolder.start = 0;
+ stringHolder.end = str.length();
+ stringHolder.buffer = buf;
+
+ vector.set(0, nullHolder);
+ vector.set(1, stringHolder);
+
+ // verify results
+ assertTrue(vector.isNull(0));
+ assertEquals(str, new String(vector.get(1)));
+
+ buf.close();
+ }
+ }
+
+ @Test
+ public void testSetNullableVarCharHolderSafe() {
+ try (VarCharVector vector = new VarCharVector("", allocator)) {
+ vector.allocateNew(5, 1);
+
+ NullableVarCharHolder nullHolder = new NullableVarCharHolder();
+ nullHolder.isSet = 0;
+
+ NullableVarCharHolder stringHolder = new NullableVarCharHolder();
+ stringHolder.isSet = 1;
+
+ String str = "hello world";
+ ArrowBuf buf = allocator.buffer(16);
+ buf.setBytes(0, str.getBytes());
+
+ stringHolder.start = 0;
+ stringHolder.end = str.length();
+ stringHolder.buffer = buf;
+
+ vector.setSafe(0, stringHolder);
+ vector.setSafe(1, nullHolder);
+
+ // verify results
+ assertEquals(str, new String(vector.get(0)));
+ assertTrue(vector.isNull(1));
+
+ buf.close();
+ }
+ }
+
+ @Test
+ public void testSetNullableVarBinaryHolder() {
+ try (VarBinaryVector vector = new VarBinaryVector("", allocator)) {
+ vector.allocateNew(100, 10);
+
+ NullableVarBinaryHolder nullHolder = new NullableVarBinaryHolder();
+ nullHolder.isSet = 0;
+
+ NullableVarBinaryHolder binHolder = new NullableVarBinaryHolder();
+ binHolder.isSet = 1;
+
+ String str = "hello";
+ ArrowBuf buf = allocator.buffer(16);
+ buf.setBytes(0, str.getBytes());
+
+ binHolder.start = 0;
+ binHolder.end = str.length();
+ binHolder.buffer = buf;
+
+ vector.set(0, nullHolder);
+ vector.set(1, binHolder);
+
+ // verify results
+ assertTrue(vector.isNull(0));
+ assertEquals(str, new String(vector.get(1)));
+
+ buf.close();
+ }
+ }
+
+ @Test
+ public void testSetNullableVarBinaryHolderSafe() {
+ try (VarBinaryVector vector = new VarBinaryVector("", allocator)) {
+ vector.allocateNew(5, 1);
+
+ NullableVarBinaryHolder nullHolder = new NullableVarBinaryHolder();
+ nullHolder.isSet = 0;
+
+ NullableVarBinaryHolder binHolder = new NullableVarBinaryHolder();
+ binHolder.isSet = 1;
+
+ String str = "hello world";
+ ArrowBuf buf = allocator.buffer(16);
+ buf.setBytes(0, str.getBytes());
+
+ binHolder.start = 0;
+ binHolder.end = str.length();
+ binHolder.buffer = buf;
+
+ vector.setSafe(0, binHolder);
+ vector.setSafe(1, nullHolder);
+
+ // verify results
+ assertEquals(str, new String(vector.get(0)));
+ assertTrue(vector.isNull(1));
+
+ buf.close();
+ }
+ }
+
+ @Test
+ public void testGetPointerFixedWidth() {
+ final int vectorLength = 100;
+ try (IntVector vec1 = new IntVector("vec1", allocator);
+ IntVector vec2 = new IntVector("vec2", allocator)) {
+ vec1.allocateNew(vectorLength);
+ vec2.allocateNew(vectorLength);
+
+ for (int i = 0; i < vectorLength; i++) {
+ if (i % 10 == 0) {
+ vec1.setNull(i);
+ vec2.setNull(i);
+ } else {
+ vec1.set(i, i * 1234);
+ vec2.set(i, i * 1234);
+ }
+ }
+
+ ArrowBufPointer ptr1 = new ArrowBufPointer();
+ ArrowBufPointer ptr2 = new ArrowBufPointer();
+
+ for (int i = 0; i < vectorLength; i++) {
+ vec1.getDataPointer(i, ptr1);
+ vec2.getDataPointer(i, ptr2);
+
+ if (i % 10 == 0) {
+ assertNull(ptr1.getBuf());
+ assertNull(ptr2.getBuf());
+ }
+
+ assertTrue(ptr1.equals(ptr2));
+ assertTrue(ptr2.equals(ptr2));
+ }
+ }
+ }
+
+ @Test
+ public void testGetPointerVariableWidth() {
+ final String[] sampleData = new String[]{
+ "abc", "123", "def", null, "hello", "aaaaa", "world", "2019", null, "0717"};
+
+ try (VarCharVector vec1 = new VarCharVector("vec1", allocator);
+ VarCharVector vec2 = new VarCharVector("vec2", allocator)) {
+ vec1.allocateNew(sampleData.length * 10, sampleData.length);
+ vec2.allocateNew(sampleData.length * 10, sampleData.length);
+
+ for (int i = 0; i < sampleData.length; i++) {
+ String str = sampleData[i];
+ if (str != null) {
+ vec1.set(i, sampleData[i].getBytes());
+ vec2.set(i, sampleData[i].getBytes());
+ } else {
+ vec1.setNull(i);
+ vec2.setNull(i);
+ }
+ }
+
+ ArrowBufPointer ptr1 = new ArrowBufPointer();
+ ArrowBufPointer ptr2 = new ArrowBufPointer();
+
+ for (int i = 0; i < sampleData.length; i++) {
+ vec1.getDataPointer(i, ptr1);
+ vec2.getDataPointer(i, ptr2);
+
+ assertTrue(ptr1.equals(ptr2));
+ assertTrue(ptr2.equals(ptr2));
+ }
+ }
+ }
+
+ @Test
+ public void testGetNullFromVariableWidthVector() {
+ try (final VarCharVector varCharVector = new VarCharVector("varcharvec", allocator);
+ final VarBinaryVector varBinaryVector = new VarBinaryVector("varbinary", allocator)) {
+ varCharVector.allocateNew(10, 1);
+ varBinaryVector.allocateNew(10, 1);
+
+ varCharVector.setNull(0);
+ varBinaryVector.setNull(0);
+
+ assertNull(varCharVector.get(0));
+ assertNull(varBinaryVector.get(0));
+ }
+ }
+
+ @Test
+ public void testZeroVectorEquals() {
+ try (final ZeroVector vector1 = new ZeroVector("vector");
+ final ZeroVector vector2 = new ZeroVector("vector")) {
+
+ VectorEqualsVisitor visitor = new VectorEqualsVisitor();
+ assertTrue(visitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testZeroVectorNotEquals() {
+ try (final IntVector intVector = new IntVector("int", allocator);
+ final ZeroVector zeroVector = new ZeroVector("zero");
+ final ZeroVector zeroVector1 = new ZeroVector("zero1")) {
+
+ VectorEqualsVisitor zeroVisitor = new VectorEqualsVisitor();
+ assertFalse(zeroVisitor.vectorEquals(intVector, zeroVector));
+
+ VectorEqualsVisitor intVisitor = new VectorEqualsVisitor();
+ assertFalse(intVisitor.vectorEquals(zeroVector, intVector));
+
+ VectorEqualsVisitor twoZeroVisitor = new VectorEqualsVisitor();
+ // they are not equal because of distinct names
+ assertFalse(twoZeroVisitor.vectorEquals(zeroVector, zeroVector1));
+ }
+ }
+
+ @Test
+ public void testIntVectorEqualsWithNull() {
+ try (final IntVector vector1 = new IntVector("int", allocator);
+ final IntVector vector2 = new IntVector("int", allocator)) {
+
+ setVector(vector1, 1, 2);
+ setVector(vector2, 1, null);
+
+ VectorEqualsVisitor visitor = new VectorEqualsVisitor();
+
+ assertFalse(visitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testIntVectorEquals() {
+ try (final IntVector vector1 = new IntVector("int", allocator);
+ final IntVector vector2 = new IntVector("int", allocator)) {
+
+ setVector(vector1, 1, 2, 3);
+ setVector(vector2, 1, 2, null);
+
+ VectorEqualsVisitor visitor = new VectorEqualsVisitor();
+
+ assertFalse(visitor.vectorEquals(vector1, vector2));
+
+ vector2.setValueCount(3);
+ vector2.setSafe(2, 2);
+ assertFalse(vector1.equals(vector2));
+
+ vector2.setSafe(2, 3);
+ assertTrue(visitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testDecimalVectorEquals() {
+ try (final DecimalVector vector1 = new DecimalVector("decimal", allocator, 3, 3);
+ final DecimalVector vector2 = new DecimalVector("decimal", allocator, 3, 3);
+ final DecimalVector vector3 = new DecimalVector("decimal", allocator, 3, 2)) {
+
+ setVector(vector1, 100L, 200L);
+ setVector(vector2, 100L, 200L);
+ setVector(vector3, 100L, 200L);
+
+ VectorEqualsVisitor visitor1 = new VectorEqualsVisitor();
+ VectorEqualsVisitor visitor2 = new VectorEqualsVisitor();
+
+ assertTrue(visitor1.vectorEquals(vector1, vector2));
+ assertFalse(visitor2.vectorEquals(vector1, vector3));
+ }
+ }
+
+ @Test
+ public void testVarcharVectorEqualsWithNull() {
+ try (final VarCharVector vector1 = new VarCharVector("varchar", allocator);
+ final VarCharVector vector2 = new VarCharVector("varchar", allocator)) {
+
+ setVector(vector1, STR1, STR2);
+ setVector(vector2, STR1, null);
+
+ VectorEqualsVisitor visitor = new VectorEqualsVisitor();
+ assertFalse(visitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testVarcharVectorEquals() {
+ try (final VarCharVector vector1 = new VarCharVector("varchar", allocator);
+ final VarCharVector vector2 = new VarCharVector("varchar", allocator)) {
+
+ setVector(vector1, STR1, STR2, STR3);
+ setVector(vector2, STR1, STR2);
+
+ VectorEqualsVisitor visitor = new VectorEqualsVisitor();
+ assertFalse(visitor.vectorEquals(vector1, vector2));
+
+ vector2.setSafe(2, STR3, 0, STR3.length);
+ vector2.setValueCount(3);
+ assertTrue(visitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testVarBinaryVectorEquals() {
+ try (final VarBinaryVector vector1 = new VarBinaryVector("binary", allocator);
+ final VarBinaryVector vector2 = new VarBinaryVector("binary", allocator)) {
+
+ setVector(vector1, STR1, STR2, STR3);
+ setVector(vector2, STR1, STR2);
+
+ VectorEqualsVisitor visitor = new VectorEqualsVisitor();
+ assertFalse(visitor.vectorEquals(vector1, vector2));
+
+ vector2.setSafe(2, STR3, 0, STR3.length);
+ vector2.setValueCount(3);
+ assertTrue(visitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testListVectorEqualsWithNull() {
+ try (final ListVector vector1 = ListVector.empty("list", allocator);
+ final ListVector vector2 = ListVector.empty("list", allocator);) {
+
+ UnionListWriter writer1 = vector1.getWriter();
+ writer1.allocate();
+
+ //set some values
+ writeListVector(writer1, new int[] {1, 2});
+ writeListVector(writer1, new int[] {3, 4});
+ writeListVector(writer1, new int[] {});
+ writer1.setValueCount(3);
+
+ UnionListWriter writer2 = vector2.getWriter();
+ writer2.allocate();
+
+ //set some values
+ writeListVector(writer2, new int[] {1, 2});
+ writeListVector(writer2, new int[] {3, 4});
+ writer2.setValueCount(3);
+
+ VectorEqualsVisitor visitor = new VectorEqualsVisitor();
+
+ assertFalse(visitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testListVectorEquals() {
+ try (final ListVector vector1 = ListVector.empty("list", allocator);
+ final ListVector vector2 = ListVector.empty("list", allocator);) {
+
+ UnionListWriter writer1 = vector1.getWriter();
+ writer1.allocate();
+
+ //set some values
+ writeListVector(writer1, new int[] {1, 2});
+ writeListVector(writer1, new int[] {3, 4});
+ writeListVector(writer1, new int[] {5, 6});
+ writer1.setValueCount(3);
+
+ UnionListWriter writer2 = vector2.getWriter();
+ writer2.allocate();
+
+ //set some values
+ writeListVector(writer2, new int[] {1, 2});
+ writeListVector(writer2, new int[] {3, 4});
+ writer2.setValueCount(2);
+
+ VectorEqualsVisitor visitor = new VectorEqualsVisitor();
+ assertFalse(visitor.vectorEquals(vector1, vector2));
+
+ writeListVector(writer2, new int[] {5, 6});
+ writer2.setValueCount(3);
+
+ assertTrue(visitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testStructVectorEqualsWithNull() {
+
+ try (final StructVector vector1 = StructVector.empty("struct", allocator);
+ final StructVector vector2 = StructVector.empty("struct", allocator);) {
+ vector1.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ vector1.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class);
+ vector2.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ vector2.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class);
+
+ NullableStructWriter writer1 = vector1.getWriter();
+ writer1.allocate();
+
+ writeStructVector(writer1, 1, 10L);
+ writeStructVector(writer1, 2, 20L);
+ writeStructVector(writer1, 3, 30L);
+ writer1.setValueCount(3);
+
+ NullableStructWriter writer2 = vector2.getWriter();
+ writer2.allocate();
+
+ writeStructVector(writer2, 1, 10L);
+ writeStructVector(writer2, 3, 30L);
+ writer2.setValueCount(3);
+
+ VectorEqualsVisitor visitor = new VectorEqualsVisitor();
+ assertFalse(visitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testStructVectorEquals() {
+ try (final StructVector vector1 = StructVector.empty("struct", allocator);
+ final StructVector vector2 = StructVector.empty("struct", allocator);) {
+ vector1.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ vector1.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class);
+ vector2.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ vector2.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class);
+
+ NullableStructWriter writer1 = vector1.getWriter();
+ writer1.allocate();
+
+ writeStructVector(writer1, 1, 10L);
+ writeStructVector(writer1, 2, 20L);
+ writeStructVector(writer1, 3, 30L);
+ writer1.setValueCount(3);
+
+ NullableStructWriter writer2 = vector2.getWriter();
+ writer2.allocate();
+
+ writeStructVector(writer2, 1, 10L);
+ writeStructVector(writer2, 2, 20L);
+ writer2.setValueCount(2);
+
+ VectorEqualsVisitor visitor = new VectorEqualsVisitor();
+ assertFalse(visitor.vectorEquals(vector1, vector2));
+
+ writeStructVector(writer2, 3, 30L);
+ writer2.setValueCount(3);
+
+ assertTrue(visitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testStructVectorEqualsWithDiffChild() {
+ try (final StructVector vector1 = StructVector.empty("struct", allocator);
+ final StructVector vector2 = StructVector.empty("struct", allocator);) {
+ vector1.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ vector1.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class);
+ vector2.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ vector2.addOrGet("f10", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class);
+
+ NullableStructWriter writer1 = vector1.getWriter();
+ writer1.allocate();
+
+ writeStructVector(writer1, 1, 10L);
+ writeStructVector(writer1, 2, 20L);
+ writer1.setValueCount(2);
+
+ NullableStructWriter writer2 = vector2.getWriter();
+ writer2.allocate();
+
+ writeStructVector(writer2, 1, 10L);
+ writeStructVector(writer2, 2, 20L);
+ writer2.setValueCount(2);
+
+ VectorEqualsVisitor visitor = new VectorEqualsVisitor();
+ assertFalse(visitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testUnionVectorEquals() {
+ try (final UnionVector vector1 = new UnionVector("union", allocator, /* field type */ null, /* call-back */ null);
+ final UnionVector vector2 = new UnionVector("union", allocator, /* field type */ null, /* call-back */ null);) {
+
+ final NullableUInt4Holder uInt4Holder = new NullableUInt4Holder();
+ uInt4Holder.value = 10;
+ uInt4Holder.isSet = 1;
+
+ final NullableIntHolder intHolder = new NullableIntHolder();
+ uInt4Holder.value = 20;
+ uInt4Holder.isSet = 1;
+
+ vector1.setType(0, Types.MinorType.UINT4);
+ vector1.setSafe(0, uInt4Holder);
+
+ vector1.setType(1, Types.MinorType.INT);
+ vector1.setSafe(1, intHolder);
+ vector1.setValueCount(2);
+
+ vector2.setType(0, Types.MinorType.UINT4);
+ vector2.setSafe(0, uInt4Holder);
+
+ vector2.setType(1, Types.MinorType.INT);
+ vector2.setSafe(1, intHolder);
+ vector2.setValueCount(2);
+
+ VectorEqualsVisitor visitor = new VectorEqualsVisitor();
+ assertTrue(visitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testEqualsWithIndexOutOfRange() {
+ try (final IntVector vector1 = new IntVector("int", allocator);
+ final IntVector vector2 = new IntVector("int", allocator)) {
+
+ setVector(vector1, 1, 2);
+ setVector(vector2, 1, 2);
+
+ assertTrue(new RangeEqualsVisitor(vector1, vector2).rangeEquals(new Range(2, 3, 1)));
+ }
+ }
+
+ @Test
+ public void testFixedWidthVectorNullHashCode() {
+ try (IntVector intVec = new IntVector("int vector", allocator)) {
+ intVec.allocateNew(1);
+ intVec.setValueCount(1);
+
+ intVec.set(0, 100);
+ intVec.setNull(0);
+
+ assertEquals(0, intVec.hashCode(0));
+ }
+ }
+
+ @Test
+ public void testVariableWidthVectorNullHashCode() {
+ try (VarCharVector varChVec = new VarCharVector("var char vector", allocator)) {
+ varChVec.allocateNew(100, 1);
+ varChVec.setValueCount(1);
+
+ varChVec.set(0, "abc".getBytes());
+ varChVec.setNull(0);
+
+ assertEquals(0, varChVec.hashCode(0));
+ }
+ }
+
+ @Test
+ public void testUnionNullHashCode() {
+ try (UnionVector srcVector =
+ new UnionVector(EMPTY_SCHEMA_PATH, allocator, /* field type */ null, /* call-back */ null)) {
+ srcVector.allocateNew();
+
+ final NullableIntHolder holder = new NullableIntHolder();
+ holder.isSet = 0;
+
+ // write some data
+ srcVector.setType(0, MinorType.INT);
+ srcVector.setSafe(0, holder);
+
+ assertEquals(0, srcVector.hashCode(0));
+ }
+ }
+
+ @Test
+ public void testToString() {
+ try (final IntVector intVector = new IntVector("intVector", allocator);
+ final ListVector listVector = ListVector.empty("listVector", allocator);
+ final StructVector structVector = StructVector.empty("structVector", allocator)) {
+
+ // validate intVector toString
+ assertEquals("[]", intVector.toString());
+ intVector.setValueCount(3);
+ intVector.setSafe(0, 1);
+ intVector.setSafe(1, 2);
+ intVector.setSafe(2, 3);
+ assertEquals("[1, 2, 3]", intVector.toString());
+
+ // validate intVector with plenty values
+ intVector.setValueCount(100);
+ for (int i = 0; i < 100; i++) {
+ intVector.setSafe(i, i);
+ }
+ assertEquals("[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ... 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]",
+ intVector.toString());
+
+ // validate listVector toString
+ listVector.allocateNewSafe();
+ listVector.initializeChildrenFromFields(
+ Collections.singletonList(Field.nullable("child", ArrowType.Utf8.INSTANCE)));
+ VarCharVector dataVector = (VarCharVector) listVector.getDataVector();
+
+ listVector.startNewValue(0);
+ dataVector.setSafe(0, "aaa".getBytes(StandardCharsets.UTF_8));
+ dataVector.setSafe(1, "bbb".getBytes(StandardCharsets.UTF_8));
+ listVector.endValue(0, 2);
+
+ listVector.startNewValue(1);
+ dataVector.setSafe(2, "ccc".getBytes(StandardCharsets.UTF_8));
+ dataVector.setSafe(3, "ddd".getBytes(StandardCharsets.UTF_8));
+ listVector.endValue(1, 2);
+ listVector.setValueCount(2);
+
+ assertEquals("[[\"aaa\",\"bbb\"], [\"ccc\",\"ddd\"]]", listVector.toString());
+
+ // validate structVector toString
+ structVector.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ structVector.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class);
+
+ NullableStructWriter structWriter = structVector.getWriter();
+ structWriter.allocate();
+
+ writeStructVector(structWriter, 1, 10L);
+ writeStructVector(structWriter, 2, 20L);
+ structWriter.setValueCount(2);
+
+ assertEquals("[{\"f0\":1,\"f1\":10}, {\"f0\":2,\"f1\":20}]", structVector.toString());
+ }
+ }
+
+ @Test
+ public void testUInt1VectorToString() {
+ try (final UInt1Vector uInt1Vector = new UInt1Vector("uInt1Vector", allocator)) {
+ setVector(uInt1Vector, (byte) 0xff);
+ assertEquals("[255]", uInt1Vector.toString());
+ }
+ }
+
+ @Test
+ public void testUInt2VectorToString() {
+ try (final UInt2Vector uInt2Vector = new UInt2Vector("uInt2Vector", allocator)) {
+ setVector(uInt2Vector, (char) 0xffff);
+ assertEquals("[65535]", uInt2Vector.toString());
+ }
+ }
+
+ @Test
+ public void testUInt4VectorToString() {
+ try (final UInt4Vector uInt4Vector = new UInt4Vector("uInt4Vector", allocator)) {
+ setVector(uInt4Vector, 0xffffffff);
+ assertEquals("[4294967295]", uInt4Vector.toString());
+ }
+ }
+
+ @Test
+ public void testUInt8VectorToString() {
+ try (final UInt8Vector uInt8Vector = new UInt8Vector("uInt8Vector", allocator)) {
+ setVector(uInt8Vector, 0xffffffffffffffffL);
+ assertEquals("[18446744073709551615]", uInt8Vector.toString());
+ }
+ }
+
+ @Test
+ public void testUnloadVariableWidthVector() {
+ try (final VarCharVector varCharVector = new VarCharVector("var char", allocator)) {
+ varCharVector.allocateNew(5, 2);
+ varCharVector.setValueCount(2);
+
+ varCharVector.set(0, "abcd".getBytes());
+
+ List<ArrowBuf> bufs = varCharVector.getFieldBuffers();
+ assertEquals(3, bufs.size());
+
+ ArrowBuf offsetBuf = bufs.get(1);
+ ArrowBuf dataBuf = bufs.get(2);
+
+ assertEquals(12, offsetBuf.writerIndex());
+ assertEquals(4, offsetBuf.getInt(4));
+ assertEquals(4, offsetBuf.getInt(8));
+
+ assertEquals(4, dataBuf.writerIndex());
+ }
+ }
+
+ private void writeStructVector(NullableStructWriter writer, int value1, long value2) {
+ writer.start();
+ writer.integer("f0").writeInt(value1);
+ writer.bigInt("f1").writeBigInt(value2);
+ writer.end();
+ }
+
+ private void writeListVector(UnionListWriter writer, int[] values) {
+ writer.startList();
+ for (int v: values) {
+ writer.integer().writeInt(v);
+ }
+ writer.endList();
+ }
+
+ @Test
+ public void testVariableVectorGetEndOffset() {
+ try (final VarCharVector vector1 = new VarCharVector("v1", allocator);
+ final VarBinaryVector vector2 = new VarBinaryVector("v2", allocator)) {
+
+ setVector(vector1, STR1, null, STR2);
+ setVector(vector2, STR1, STR2, STR3);
+
+ assertEquals(0, vector1.getStartOffset(0));
+ assertEquals(STR1.length, vector1.getEndOffset(0));
+ assertEquals(STR1.length, vector1.getStartOffset(1));
+ assertEquals(STR1.length, vector1.getEndOffset(1));
+ assertEquals(STR1.length, vector1.getStartOffset(2));
+ assertEquals(STR1.length + STR2.length, vector1.getEndOffset(2));
+
+ assertEquals(0, vector2.getStartOffset(0));
+ assertEquals(STR1.length, vector2.getEndOffset(0));
+ assertEquals(STR1.length, vector2.getStartOffset(1));
+ assertEquals(STR1.length + STR2.length, vector2.getEndOffset(1));
+ assertEquals(STR1.length + STR2.length, vector2.getStartOffset(2));
+ assertEquals(STR1.length + STR2.length + STR3.length, vector2.getEndOffset(2));
+ }
+ }
+
+ @Test
+ public void testEmptyBufBehavior() {
+ final int valueCount = 10;
+
+ try (final IntVector vector = new IntVector("v", allocator)) {
+ assertEquals(1, vector.getDataBuffer().refCnt());
+ assertEquals(1, vector.getValidityBuffer().refCnt());
+ assertEquals(0, vector.getDataBuffer().capacity());
+ assertEquals(0, vector.getValidityBuffer().capacity());
+
+ vector.allocateNew(valueCount);
+ assertEquals(2, vector.getDataBuffer().refCnt());
+ assertEquals(2, vector.getValidityBuffer().refCnt());
+ assertEquals(56, vector.getDataBuffer().capacity());
+ assertEquals(8, vector.getValidityBuffer().capacity());
+
+ vector.close();
+ assertEquals(1, vector.getDataBuffer().refCnt());
+ assertEquals(1, vector.getValidityBuffer().refCnt());
+ assertEquals(0, vector.getDataBuffer().capacity());
+ assertEquals(0, vector.getValidityBuffer().capacity());
+ }
+
+ try (final VarCharVector vector = new VarCharVector("v", allocator)) {
+ assertEquals(1, vector.getDataBuffer().refCnt());
+ assertEquals(1, vector.getValidityBuffer().refCnt());
+ assertEquals(1, vector.getOffsetBuffer().refCnt());
+ assertEquals(0, vector.getDataBuffer().capacity());
+ assertEquals(0, vector.getValidityBuffer().capacity());
+ assertEquals(0, vector.getOffsetBuffer().capacity());
+
+ vector.allocateNew(valueCount);
+ assertEquals(1, vector.getDataBuffer().refCnt());
+ assertEquals(2, vector.getValidityBuffer().refCnt());
+ assertEquals(2, vector.getOffsetBuffer().refCnt());
+ assertEquals(32768, vector.getDataBuffer().capacity());
+ assertEquals(8, vector.getValidityBuffer().capacity());
+ assertEquals(56, vector.getOffsetBuffer().capacity());
+
+ vector.close();
+ assertEquals(1, vector.getDataBuffer().refCnt());
+ assertEquals(1, vector.getValidityBuffer().refCnt());
+ assertEquals(1, vector.getOffsetBuffer().refCnt());
+ assertEquals(0, vector.getDataBuffer().capacity());
+ assertEquals(0, vector.getValidityBuffer().capacity());
+ assertEquals(0, vector.getOffsetBuffer().capacity());
+ }
+
+ try (final ListVector vector = ListVector.empty("v", allocator)) {
+ assertEquals(1, vector.getValidityBuffer().refCnt());
+ assertEquals(1, vector.getOffsetBuffer().refCnt());
+ assertEquals(0, vector.getValidityBuffer().capacity());
+ assertEquals(0, vector.getOffsetBuffer().capacity());
+
+ vector.setValueCount(valueCount);
+ vector.allocateNewSafe();
+ assertEquals(1, vector.getValidityBuffer().refCnt());
+ assertEquals(1, vector.getOffsetBuffer().refCnt());
+ assertEquals(512, vector.getValidityBuffer().capacity());
+ assertEquals(16384, vector.getOffsetBuffer().capacity());
+
+ vector.close();
+ assertEquals(1, vector.getValidityBuffer().refCnt());
+ assertEquals(1, vector.getOffsetBuffer().refCnt());
+ assertEquals(0, vector.getValidityBuffer().capacity());
+ assertEquals(0, vector.getOffsetBuffer().capacity());
+ }
+
+ try (final FixedSizeListVector vector = FixedSizeListVector.empty("v", 2, allocator)) {
+ assertEquals(1, vector.getValidityBuffer().refCnt());
+ assertEquals(0, vector.getValidityBuffer().capacity());
+
+ vector.setValueCount(10);
+ vector.allocateNewSafe();
+ assertEquals(1, vector.getValidityBuffer().refCnt());
+ assertEquals(512, vector.getValidityBuffer().capacity());
+
+ vector.close();
+ assertEquals(1, vector.getValidityBuffer().refCnt());
+ assertEquals(0, vector.getValidityBuffer().capacity());
+ }
+
+ try (final StructVector vector = StructVector.empty("v", allocator)) {
+ assertEquals(1, vector.getValidityBuffer().refCnt());
+ assertEquals(0, vector.getValidityBuffer().capacity());
+
+ vector.setValueCount(valueCount);
+ vector.allocateNewSafe();
+ assertEquals(1, vector.getValidityBuffer().refCnt());
+ assertEquals(512, vector.getValidityBuffer().capacity());
+
+ vector.close();
+ assertEquals(1, vector.getValidityBuffer().refCnt());
+ assertEquals(0, vector.getValidityBuffer().capacity());
+ }
+
+ try (final UnionVector vector = UnionVector.empty("v", allocator)) {
+ assertEquals(1, vector.getTypeBuffer().refCnt());
+ assertEquals(0, vector.getTypeBuffer().capacity());
+
+ vector.setValueCount(10);
+ vector.allocateNewSafe();
+ assertEquals(1, vector.getTypeBuffer().refCnt());
+ assertEquals(4096, vector.getTypeBuffer().capacity());
+
+ vector.close();
+ assertEquals(1, vector.getTypeBuffer().refCnt());
+ assertEquals(0, vector.getTypeBuffer().capacity());
+ }
+
+ try (final DenseUnionVector vector = DenseUnionVector.empty("v", allocator)) {
+ assertEquals(1, vector.getTypeBuffer().refCnt());
+ assertEquals(1, vector.getOffsetBuffer().refCnt());
+ assertEquals(0, vector.getTypeBuffer().capacity());
+ assertEquals(0, vector.getOffsetBuffer().capacity());
+
+ vector.setValueCount(valueCount);
+ vector.allocateNew();
+ assertEquals(1, vector.getTypeBuffer().refCnt());
+ assertEquals(1, vector.getOffsetBuffer().refCnt());
+ assertEquals(4096, vector.getTypeBuffer().capacity());
+ assertEquals(16384, vector.getOffsetBuffer().capacity());
+
+ vector.close();
+ assertEquals(1, vector.getTypeBuffer().refCnt());
+ assertEquals(1, vector.getOffsetBuffer().refCnt());
+ assertEquals(0, vector.getTypeBuffer().capacity());
+ assertEquals(0, vector.getOffsetBuffer().capacity());
+ }
+ }
+
+ @Test
+ public void testSetGetUInt1() {
+ try (UInt1Vector vector = new UInt1Vector("vector", allocator)) {
+ vector.allocateNew(2);
+
+ vector.setWithPossibleTruncate(0, UInt1Vector.MAX_UINT1);
+ vector.setUnsafeWithPossibleTruncate(1, UInt1Vector.MAX_UINT1);
+ vector.setValueCount(2);
+
+ assertEquals(UInt1Vector.MAX_UINT1 & UInt1Vector.PROMOTION_MASK, vector.getValueAsLong(0));
+ assertEquals(UInt1Vector.MAX_UINT1 & UInt1Vector.PROMOTION_MASK, vector.getValueAsLong(1));
+ }
+ }
+
+ @Test
+ public void testSetGetUInt2() {
+ try (UInt2Vector vector = new UInt2Vector("vector", allocator)) {
+ vector.allocateNew(2);
+
+ vector.setWithPossibleTruncate(0, UInt2Vector.MAX_UINT2);
+ vector.setUnsafeWithPossibleTruncate(1, UInt2Vector.MAX_UINT2);
+ vector.setValueCount(2);
+
+ assertEquals(UInt2Vector.MAX_UINT2, vector.getValueAsLong(0));
+ assertEquals(UInt2Vector.MAX_UINT2, vector.getValueAsLong(1));
+ }
+ }
+
+ @Test
+ public void testSetGetUInt4() {
+ try (UInt4Vector vector = new UInt4Vector("vector", allocator)) {
+ vector.allocateNew(2);
+
+ vector.setWithPossibleTruncate(0, UInt4Vector.MAX_UINT4);
+ vector.setUnsafeWithPossibleTruncate(1, UInt4Vector.MAX_UINT4);
+ vector.setValueCount(2);
+
+ long expected = UInt4Vector.MAX_UINT4 & UInt4Vector.PROMOTION_MASK;
+ assertEquals(expected, vector.getValueAsLong(0));
+ assertEquals(expected, vector.getValueAsLong(1));
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharListVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharListVector.java
new file mode 100644
index 000000000..a9b155499
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharListVector.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.impl.UnionListWriter;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestVarCharListVector {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testVarCharListWithNulls() {
+ byte[] bytes = "a".getBytes();
+ try (ListVector vector = new ListVector("VarList", allocator, FieldType.nullable(Types
+ .MinorType.VARCHAR.getType()), null);
+ ArrowBuf tempBuf = allocator.buffer(bytes.length)) {
+ UnionListWriter writer = vector.getWriter();
+ writer.allocate();
+
+ // populate input vector with the following records
+ // ["a"]
+ // null
+ // ["b"]
+ writer.setPosition(0); // optional
+ writer.startList();
+ tempBuf.setBytes(0, bytes);
+ writer.writeVarChar(0, bytes.length, tempBuf);
+ writer.endList();
+
+ writer.setPosition(2);
+ writer.startList();
+ bytes = "b".getBytes();
+ tempBuf.setBytes(0, bytes);
+ writer.writeVarChar(0, bytes.length, tempBuf);
+ writer.endList();
+
+ writer.setValueCount(2);
+
+ Assert.assertTrue(vector.getValueCount() == 2);
+ Assert.assertTrue(vector.getDataVector().getValueCount() == 2);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVectorAlloc.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVectorAlloc.java
new file mode 100644
index 000000000..dfc75ec8e
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVectorAlloc.java
@@ -0,0 +1,169 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.util.Arrays;
+import java.util.Collections;
+
+import org.apache.arrow.memory.AllocationListener;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.memory.rounding.DefaultRoundingPolicy;
+import org.apache.arrow.memory.rounding.RoundingPolicy;
+import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.ArrowType.Decimal;
+import org.apache.arrow.vector.types.pojo.ArrowType.Duration;
+import org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeBinary;
+import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestVectorAlloc {
+ private BufferAllocator rootAllocator;
+
+ private BufferAllocator policyAllocator;
+
+ @Before
+ public void init() {
+ rootAllocator = new RootAllocator(Long.MAX_VALUE);
+ policyAllocator =
+ new RootAllocator(AllocationListener.NOOP, Integer.MAX_VALUE, new CustomPolicy());
+ }
+
+ @After
+ public void terminate() throws Exception {
+ rootAllocator.close();
+ policyAllocator.close();
+ }
+
+ private static Field field(String name, ArrowType type) {
+ return new Field(name, new FieldType(true, type, null), Collections.emptyList());
+ }
+
+ @Test
+ public void testVectorAllocWithField() {
+ Schema schema = new Schema(Arrays.asList(
+ field("TINYINT", MinorType.TINYINT.getType()),
+ field("SMALLINT", MinorType.SMALLINT.getType()),
+ field("INT", MinorType.INT.getType()),
+ field("BIGINT", MinorType.BIGINT.getType()),
+ field("UINT1", MinorType.UINT1.getType()),
+ field("UINT2", MinorType.UINT2.getType()),
+ field("UINT4", MinorType.UINT4.getType()),
+ field("UINT8", MinorType.UINT8.getType()),
+ field("FLOAT4", MinorType.FLOAT4.getType()),
+ field("FLOAT8", MinorType.FLOAT8.getType()),
+ field("UTF8", MinorType.VARCHAR.getType()),
+ field("VARBINARY", MinorType.VARBINARY.getType()),
+ field("BIT", MinorType.BIT.getType()),
+ field("DECIMAL", new Decimal(38, 5, 128)),
+ field("FIXEDSIZEBINARY", new FixedSizeBinary(50)),
+ field("DATEDAY", MinorType.DATEDAY.getType()),
+ field("DATEMILLI", MinorType.DATEMILLI.getType()),
+ field("TIMESEC", MinorType.TIMESEC.getType()),
+ field("TIMEMILLI", MinorType.TIMEMILLI.getType()),
+ field("TIMEMICRO", MinorType.TIMEMICRO.getType()),
+ field("TIMENANO", MinorType.TIMENANO.getType()),
+ field("TIMESTAMPSEC", MinorType.TIMESTAMPSEC.getType()),
+ field("TIMESTAMPMILLI", MinorType.TIMESTAMPMILLI.getType()),
+ field("TIMESTAMPMICRO", MinorType.TIMESTAMPMICRO.getType()),
+ field("TIMESTAMPNANO", MinorType.TIMESTAMPNANO.getType()),
+ field("TIMESTAMPSECTZ", new Timestamp(TimeUnit.SECOND, "PST")),
+ field("TIMESTAMPMILLITZ", new Timestamp(TimeUnit.MILLISECOND, "PST")),
+ field("TIMESTAMPMICROTZ", new Timestamp(TimeUnit.MICROSECOND, "PST")),
+ field("TIMESTAMPNANOTZ", new Timestamp(TimeUnit.NANOSECOND, "PST")),
+ field("INTERVALDAY", MinorType.INTERVALDAY.getType()),
+ field("INTERVALYEAR", MinorType.INTERVALYEAR.getType()),
+ field("DURATION", new Duration(TimeUnit.MILLISECOND))
+ ));
+
+ try (BufferAllocator allocator = rootAllocator.newChildAllocator("child", 0, Long.MAX_VALUE)) {
+ for (Field field : schema.getFields()) {
+ try (FieldVector vector = field.createVector(allocator)) {
+ assertEquals(vector.getMinorType(),
+ Types.getMinorTypeForArrowType(field.getFieldType().getType()));
+ vector.allocateNew();
+ }
+ }
+ }
+ }
+
+ private static final int CUSTOM_SEGMENT_SIZE = 200;
+
+ /**
+ * A custom rounding policy that rounds the size to
+ * the next multiple of 200.
+ */
+ private static class CustomPolicy implements RoundingPolicy {
+
+ @Override
+ public long getRoundedSize(long requestSize) {
+ return (requestSize + CUSTOM_SEGMENT_SIZE - 1) / CUSTOM_SEGMENT_SIZE * CUSTOM_SEGMENT_SIZE;
+ }
+ }
+
+ @Test
+ public void testFixedWidthVectorAllocation() {
+ try (IntVector vec1 = new IntVector("vec", policyAllocator);
+ IntVector vec2 = new IntVector("vec", rootAllocator)) {
+ assertTrue(vec1.getAllocator().getRoundingPolicy() instanceof CustomPolicy);
+ vec1.allocateNew(50);
+ long totalCapacity = vec1.getValidityBuffer().capacity() + vec1.getDataBuffer().capacity();
+
+ // the total capacity must be a multiple of the segment size
+ assertTrue(totalCapacity % CUSTOM_SEGMENT_SIZE == 0);
+
+ assertTrue(vec2.getAllocator().getRoundingPolicy() instanceof DefaultRoundingPolicy);
+ vec2.allocateNew(50);
+ totalCapacity = vec2.getValidityBuffer().capacity() + vec2.getDataBuffer().capacity();
+
+ // the total capacity must be a power of two
+ assertEquals(totalCapacity & (totalCapacity - 1), 0);
+ }
+ }
+
+ @Test
+ public void testVariableWidthVectorAllocation() {
+ try (VarCharVector vec1 = new VarCharVector("vec", policyAllocator);
+ VarCharVector vec2 = new VarCharVector("vec", rootAllocator)) {
+ assertTrue(vec1.getAllocator().getRoundingPolicy() instanceof CustomPolicy);
+ vec1.allocateNew(50);
+ long totalCapacity = vec1.getValidityBuffer().capacity() + vec1.getOffsetBuffer().capacity();
+
+ // the total capacity must be a multiple of the segment size
+ assertTrue(totalCapacity % CUSTOM_SEGMENT_SIZE == 0);
+
+ assertTrue(vec2.getAllocator().getRoundingPolicy() instanceof DefaultRoundingPolicy);
+ vec2.allocateNew(50);
+ totalCapacity = vec2.getValidityBuffer().capacity() + vec2.getOffsetBuffer().capacity();
+
+ // the total capacity must be a power of two
+ assertEquals(totalCapacity & (totalCapacity - 1), 0);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java
new file mode 100644
index 000000000..18bb2c957
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java
@@ -0,0 +1,474 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.*;
+
+import java.nio.charset.StandardCharsets;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.complex.DenseUnionVector;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.complex.impl.NullableStructWriter;
+import org.apache.arrow.vector.complex.impl.UnionFixedSizeListWriter;
+import org.apache.arrow.vector.complex.impl.UnionListWriter;
+import org.apache.arrow.vector.holders.NullableIntHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.DataSizeRoundingUtil;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+
+public class TestVectorReAlloc {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testFixedType() {
+ try (final UInt4Vector vector = new UInt4Vector("", allocator)) {
+ vector.setInitialCapacity(512);
+ vector.allocateNew();
+
+ assertTrue(vector.getValueCapacity() >= 512);
+ int initialCapacity = vector.getValueCapacity();
+
+ try {
+ vector.set(initialCapacity, 0);
+ Assert.fail("Expected out of bounds exception");
+ } catch (Exception e) {
+ // ok
+ }
+
+ vector.reAlloc();
+ assertTrue(vector.getValueCapacity() >= 2 * initialCapacity);
+
+ vector.set(initialCapacity, 100);
+ assertEquals(100, vector.get(initialCapacity));
+ }
+ }
+
+ @Test
+ public void testNullableType() {
+ try (final VarCharVector vector = new VarCharVector("", allocator)) {
+ vector.setInitialCapacity(512);
+ vector.allocateNew();
+
+ assertTrue(vector.getValueCapacity() >= 512);
+ int initialCapacity = vector.getValueCapacity();
+
+ try {
+ vector.set(initialCapacity, "foo".getBytes(StandardCharsets.UTF_8));
+ Assert.fail("Expected out of bounds exception");
+ } catch (Exception e) {
+ // ok
+ }
+
+ vector.reAlloc();
+ assertTrue(vector.getValueCapacity() >= 2 * initialCapacity);
+
+ vector.set(initialCapacity, "foo".getBytes(StandardCharsets.UTF_8));
+ assertEquals("foo", new String(vector.get(initialCapacity), StandardCharsets.UTF_8));
+ }
+ }
+
+ @Test
+ public void testListType() {
+ try (final ListVector vector = ListVector.empty("", allocator)) {
+ vector.addOrGetVector(FieldType.nullable(MinorType.INT.getType()));
+
+ vector.setInitialCapacity(512);
+ vector.allocateNew();
+
+ assertEquals(512, vector.getValueCapacity());
+
+ try {
+ vector.getInnerValueCountAt(2014);
+ Assert.fail("Expected out of bounds exception");
+ } catch (Exception e) {
+ // ok
+ }
+
+ vector.reAlloc();
+ assertEquals(1024, vector.getValueCapacity());
+ assertEquals(0, vector.getOffsetBuffer().getInt(2014 * ListVector.OFFSET_WIDTH));
+ }
+ }
+
+ @Test
+ public void testStructType() {
+ try (final StructVector vector = StructVector.empty("", allocator)) {
+ vector.addOrGet("", FieldType.nullable(MinorType.INT.getType()), IntVector.class);
+
+ vector.setInitialCapacity(512);
+ vector.allocateNew();
+
+ assertEquals(512, vector.getValueCapacity());
+
+ try {
+ vector.getObject(513);
+ Assert.fail("Expected out of bounds exception");
+ } catch (Exception e) {
+ // ok
+ }
+
+ vector.reAlloc();
+ assertEquals(1024, vector.getValueCapacity());
+ assertNull(vector.getObject(513));
+ }
+ }
+
+ @Test
+ public void testVariableWidthTypeSetNullValues() {
+ // Test ARROW-11223 bug is fixed
+ try (final BaseVariableWidthVector v1 = new VarCharVector("var1", allocator)) {
+ v1.setInitialCapacity(512);
+ v1.allocateNew();
+ int numNullValues1 = v1.getValueCapacity() + 1;
+ for (int i = 0; i < numNullValues1; i++) {
+ v1.setNull(i);
+ }
+ Assert.assertTrue(v1.getBufferSizeFor(numNullValues1) > 0);
+ }
+
+ try (final BaseLargeVariableWidthVector v2 = new LargeVarCharVector("var2", allocator)) {
+ v2.setInitialCapacity(512);
+ v2.allocateNew();
+ int numNullValues2 = v2.getValueCapacity() + 1;
+ for (int i = 0; i < numNullValues2; i++) {
+ v2.setNull(i);
+ }
+ Assert.assertTrue(v2.getBufferSizeFor(numNullValues2) > 0);
+ }
+ }
+
+ @Test
+ public void testFixedAllocateAfterReAlloc() throws Exception {
+ try (final IntVector vector = new IntVector("", allocator)) {
+ /*
+ * Allocate the default size, and then, reAlloc. This should double the allocation.
+ */
+ vector.allocateNewSafe(); // Initial allocation
+ vector.reAlloc(); // Double the allocation size.
+ int savedValueCapacity = vector.getValueCapacity();
+
+ /*
+ * Clear and allocate again.
+ */
+ vector.clear();
+ vector.allocateNewSafe();
+
+ /*
+ * Verify that the buffer sizes haven't changed.
+ */
+ Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity);
+ }
+ }
+
+ @Test
+ public void testVariableAllocateAfterReAlloc() throws Exception {
+ try (final VarCharVector vector = new VarCharVector("", allocator)) {
+ /*
+ * Allocate the default size, and then, reAlloc. This should double the allocation.
+ */
+ vector.allocateNewSafe(); // Initial allocation
+ vector.reAlloc(); // Double the allocation size.
+ int savedValueCapacity = vector.getValueCapacity();
+ long savedValueBufferSize = vector.valueBuffer.capacity();
+
+ /*
+ * Clear and allocate again.
+ */
+ vector.clear();
+ vector.allocateNewSafe();
+
+ /*
+ * Verify that the buffer sizes haven't changed.
+ */
+ Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity);
+ Assert.assertEquals(vector.valueBuffer.capacity(), savedValueBufferSize);
+ }
+ }
+
+ @Test
+ public void testLargeVariableAllocateAfterReAlloc() throws Exception {
+ try (final LargeVarCharVector vector = new LargeVarCharVector("", allocator)) {
+ /*
+ * Allocate the default size, and then, reAlloc. This should double the allocation.
+ */
+ vector.allocateNewSafe(); // Initial allocation
+ vector.reAlloc(); // Double the allocation size.
+ int savedValueCapacity = vector.getValueCapacity();
+ long savedValueBufferSize = vector.valueBuffer.capacity();
+
+ /*
+ * Clear and allocate again.
+ */
+ vector.clear();
+ vector.allocateNewSafe();
+
+ /*
+ * Verify that the buffer sizes haven't changed.
+ */
+ Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity);
+ Assert.assertEquals(vector.valueBuffer.capacity(), savedValueBufferSize);
+ }
+ }
+
+ @Test
+ public void testVarCharAllocateNew() throws Exception {
+ final int count = 6000;
+
+ try (final VarCharVector vector = new VarCharVector("", allocator)) {
+ vector.allocateNew(count);
+
+ // verify that the validity buffer and value buffer have capacity for atleast 'count' elements.
+ Assert.assertTrue(vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count));
+ Assert.assertTrue(vector.getOffsetBuffer().capacity() >= (count + 1) * BaseVariableWidthVector.OFFSET_WIDTH);
+ }
+ }
+
+ @Test
+ public void testLargeVarCharAllocateNew() throws Exception {
+ final int count = 6000;
+
+ try (final LargeVarCharVector vector = new LargeVarCharVector("", allocator)) {
+ vector.allocateNew(count);
+
+ // verify that the validity buffer and value buffer have capacity for atleast 'count' elements.
+ Assert.assertTrue(vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count));
+ Assert.assertTrue(vector.getOffsetBuffer().capacity() >= (count + 1) * BaseLargeVariableWidthVector.OFFSET_WIDTH);
+ }
+ }
+
+ @Test
+ public void testVarCharAllocateNewUsingHelper() throws Exception {
+ final int count = 6000;
+
+ try (final VarCharVector vector = new VarCharVector("", allocator)) {
+ AllocationHelper.allocateNew(vector, count);
+
+ // verify that the validity buffer and value buffer have capacity for atleast 'count' elements.
+ Assert.assertTrue(vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count));
+ Assert.assertTrue(vector.getOffsetBuffer().capacity() >= (count + 1) * BaseVariableWidthVector.OFFSET_WIDTH);
+ }
+ }
+
+ @Test
+ public void testLargeVarCharAllocateNewUsingHelper() throws Exception {
+ final int count = 6000;
+
+ try (final LargeVarCharVector vector = new LargeVarCharVector("", allocator)) {
+ AllocationHelper.allocateNew(vector, count);
+
+ // verify that the validity buffer and value buffer have capacity for atleast 'count' elements.
+ Assert.assertTrue(vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count));
+ Assert.assertTrue(vector.getOffsetBuffer().capacity() >= (count + 1) * BaseLargeVariableWidthVector.OFFSET_WIDTH);
+ }
+ }
+
+ @Test
+ public void testFixedRepeatedClearAndSet() throws Exception {
+ try (final IntVector vector = new IntVector("", allocator)) {
+ vector.allocateNewSafe(); // Initial allocation
+ vector.clear(); // clear vector.
+ vector.setSafe(0, 10);
+ int savedValueCapacity = vector.getValueCapacity();
+
+ for (int i = 0; i < 1024; ++i) {
+ vector.clear(); // clear vector.
+ vector.setSafe(0, 10);
+ }
+
+ // should be deterministic, and not cause a run-away increase in capacity.
+ Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity);
+ }
+ }
+
+ @Test
+ public void testVariableRepeatedClearAndSet() throws Exception {
+ try (final VarCharVector vector = new VarCharVector("", allocator)) {
+ vector.allocateNewSafe(); // Initial allocation
+
+ vector.clear(); // clear vector.
+ vector.setSafe(0, "hello world".getBytes());
+ int savedValueCapacity = vector.getValueCapacity();
+
+ for (int i = 0; i < 1024; ++i) {
+ vector.clear(); // clear vector.
+ vector.setSafe(0, "hello world".getBytes());
+ }
+
+ // should be deterministic, and not cause a run-away increase in capacity.
+ Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity);
+ }
+ }
+
+ @Test
+ public void testRepeatedValueVectorClearAndSet() throws Exception {
+ try (final ListVector vector = new ListVector("", allocator, FieldType.nullable(MinorType.INT.getType()), null)) {
+ vector.allocateNewSafe(); // Initial allocation
+ UnionListWriter writer = vector.getWriter();
+
+ vector.clear(); // clear vector.
+ writer.setPosition(0); // optional
+ writer.startList();
+ writer.writeInt(0);
+ writer.endList();
+ int savedValueCapacity = vector.getValueCapacity();
+
+ for (int i = 0; i < 1024; ++i) {
+ vector.clear(); // clear vector.
+ writer.setPosition(0); // optional
+ writer.startList();
+ writer.writeInt(i);
+ writer.endList();
+ }
+
+ // should be deterministic, and not cause a run-away increase in capacity.
+ Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity);
+ }
+ }
+
+ @Test
+ public void testStructVectorClearAndSet() throws Exception {
+ try (final StructVector vector = StructVector.empty("v", allocator)) {
+ vector.allocateNewSafe(); // Initial allocation
+
+ NullableStructWriter writer = vector.getWriter();
+
+ vector.clear(); // clear vector.
+ writer.setPosition(0); // optional
+ writer.start();
+ writer.integer("int").writeInt(0);
+ writer.end();
+ int savedValueCapacity = vector.getValueCapacity();
+
+ for (int i = 0; i < 1024; ++i) {
+ vector.clear(); // clear vector.
+ writer.setPosition(0); // optional
+ writer.start();
+ writer.integer("int").writeInt(i);
+ writer.end();
+ }
+
+ // should be deterministic, and not cause a run-away increase in capacity.
+ Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity);
+ }
+ }
+
+ @Test
+ public void testFixedSizeListVectorClearAndSet() {
+ try (final FixedSizeListVector vector = new FixedSizeListVector("", allocator,
+ FieldType.nullable(new ArrowType.FixedSizeList(2)), null)) {
+ vector.allocateNewSafe(); // Initial allocation
+ UnionFixedSizeListWriter writer = vector.getWriter();
+
+ vector.clear(); // clear vector.
+ writer.setPosition(0); // optional
+ writer.startList();
+ writer.writeInt(0);
+ writer.writeInt(1);
+ writer.endList();
+ int savedValueCapacity = vector.getValueCapacity();
+
+ for (int i = 0; i < 1024; ++i) {
+ vector.clear(); // clear vector.
+ writer.setPosition(0); // optional
+ writer.startList();
+ writer.writeInt(i);
+ writer.writeInt(i + 1);
+ writer.endList();
+ }
+
+ // should be deterministic, and not cause a run-away increase in capacity.
+ Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity);
+ }
+ }
+
+ @Test
+ public void testUnionVectorClearAndSet() {
+ try (final UnionVector vector = new UnionVector("", allocator, /* field type */ null, /* call-back */ null)) {
+ vector.allocateNewSafe(); // Initial allocation
+
+ NullableIntHolder holder = new NullableIntHolder();
+ holder.isSet = 1;
+ holder.value = 1;
+
+ vector.clear(); // clear vector.
+ vector.setType(0, MinorType.INT);
+ vector.setSafe(0, holder);
+ int savedValueCapacity = vector.getValueCapacity();
+
+ for (int i = 0; i < 1024; ++i) {
+ vector.clear(); // clear vector.
+ vector.setType(0, MinorType.INT);
+ vector.setSafe(0, holder);
+ }
+
+ // should be deterministic, and not cause a run-away increase in capacity.
+ Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity);
+ }
+ }
+
+ @Test
+ public void testDenseUnionVectorClearAndSet() {
+ try (final DenseUnionVector vector = new DenseUnionVector("", allocator, null, null)) {
+ vector.allocateNewSafe(); // Initial allocation
+
+ NullableIntHolder holder = new NullableIntHolder();
+ holder.isSet = 1;
+ holder.value = 1;
+
+ byte intTypeId = vector.registerNewTypeId(Field.nullable("", MinorType.INT.getType()));
+
+ vector.clear();
+ vector.setTypeId(0, intTypeId);
+ vector.setSafe(0, holder);
+
+ int savedValueCapacity = vector.getValueCapacity();
+
+ for (int i = 0; i < 1024; ++i) {
+ vector.clear();
+ vector.setTypeId(0, intTypeId);
+ vector.setSafe(0, holder);
+ }
+
+ // should be deterministic, and not cause a run-away increase in capacity.
+ Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java
new file mode 100644
index 000000000..71009a333
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java
@@ -0,0 +1,168 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.nio.charset.StandardCharsets;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.NonNullableStructVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeList;
+import org.apache.arrow.vector.types.pojo.ArrowType.Int;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestVectorReset {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ private void resetVectorAndVerify(ValueVector vector, ArrowBuf[] bufs) {
+ long[] sizeBefore = new long[bufs.length];
+ for (int i = 0; i < bufs.length; i++) {
+ sizeBefore[i] = bufs[i].capacity();
+ }
+ vector.reset();
+ for (int i = 0; i < bufs.length; i++) {
+ assertEquals(sizeBefore[i], bufs[i].capacity());
+ verifyBufferZeroed(bufs[i]);
+ }
+ assertEquals(0, vector.getValueCount());
+ }
+
+ private void verifyBufferZeroed(ArrowBuf buf) {
+ for (int i = 0; i < buf.capacity(); i++) {
+ assertTrue((byte) 0 == buf.getByte(i));
+ }
+ }
+
+ @Test
+ public void testFixedTypeReset() {
+ try (final UInt4Vector vector = new UInt4Vector("UInt4", allocator)) {
+ vector.allocateNewSafe();
+ vector.setNull(0);
+ vector.setValueCount(1);
+ resetVectorAndVerify(vector, vector.getBuffers(false));
+ }
+ }
+
+ @Test
+ public void testVariableTypeReset() {
+ try (final VarCharVector vector = new VarCharVector("VarChar", allocator)) {
+ vector.allocateNewSafe();
+ vector.set(0, "a".getBytes(StandardCharsets.UTF_8));
+ vector.setLastSet(0);
+ vector.setValueCount(1);
+ resetVectorAndVerify(vector, vector.getBuffers(false));
+ assertEquals(-1, vector.getLastSet());
+ }
+ }
+
+ @Test
+ public void testLargeVariableTypeReset() {
+ try (final LargeVarCharVector vector = new LargeVarCharVector("LargeVarChar", allocator)) {
+ vector.allocateNewSafe();
+ vector.set(0, "a".getBytes(StandardCharsets.UTF_8));
+ vector.setLastSet(0);
+ vector.setValueCount(1);
+ resetVectorAndVerify(vector, vector.getBuffers(false));
+ assertEquals(-1, vector.getLastSet());
+ }
+ }
+
+ @Test
+ public void testListTypeReset() {
+ try (final ListVector variableList =
+ new ListVector("VarList", allocator, FieldType.nullable(MinorType.INT.getType()), null);
+ final FixedSizeListVector fixedList =
+ new FixedSizeListVector("FixedList", allocator, FieldType.nullable(new FixedSizeList(2)), null)
+ ) {
+ // ListVector
+ variableList.allocateNewSafe();
+ variableList.startNewValue(0);
+ variableList.endValue(0, 0);
+ variableList.setValueCount(1);
+ resetVectorAndVerify(variableList, variableList.getBuffers(false));
+ assertEquals(-1, variableList.getLastSet());
+
+ // FixedSizeListVector
+ fixedList.allocateNewSafe();
+ fixedList.setNull(0);
+ fixedList.setValueCount(1);
+ resetVectorAndVerify(fixedList, fixedList.getBuffers(false));
+ }
+ }
+
+ @Test
+ public void testStructTypeReset() {
+ try (final NonNullableStructVector nonNullableStructVector =
+ new NonNullableStructVector("Struct", allocator, FieldType.nullable(MinorType.INT.getType()), null);
+ final StructVector structVector =
+ new StructVector("NullableStruct", allocator, FieldType.nullable(MinorType.INT.getType()), null)
+ ) {
+ // NonNullableStructVector
+ nonNullableStructVector.allocateNewSafe();
+ IntVector structChild = nonNullableStructVector
+ .addOrGet("child", FieldType.nullable(new Int(32, true)), IntVector.class);
+ structChild.setNull(0);
+ nonNullableStructVector.setValueCount(1);
+ resetVectorAndVerify(nonNullableStructVector, nonNullableStructVector.getBuffers(false));
+
+ // StructVector
+ structVector.allocateNewSafe();
+ structVector.setNull(0);
+ structVector.setValueCount(1);
+ resetVectorAndVerify(structVector, structVector.getBuffers(false));
+ }
+ }
+
+ @Test
+ public void testUnionTypeReset() {
+ try (final UnionVector vector = new UnionVector("Union", allocator, /* field type */ null, /* call-back */ null);
+ final IntVector dataVector = new IntVector("Int", allocator)
+ ) {
+ vector.getBufferSize();
+ vector.allocateNewSafe();
+ dataVector.allocateNewSafe();
+ vector.addVector(dataVector);
+ dataVector.setNull(0);
+ vector.setValueCount(1);
+ resetVectorAndVerify(vector, vector.getBuffers(false));
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java
new file mode 100644
index 000000000..4c5b6540f
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java
@@ -0,0 +1,318 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static junit.framework.TestCase.assertTrue;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.impl.UnionListWriter;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestVectorSchemaRoot {
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ }
+
+ @After
+ public void terminate() {
+ allocator.close();
+ }
+
+ @Test
+ public void testResetRowCount() {
+ final int size = 20;
+ try (final BitVector vec1 = new BitVector("bit", allocator);
+ final IntVector vec2 = new IntVector("int", allocator)) {
+ VectorSchemaRoot vsr = VectorSchemaRoot.of(vec1, vec2);
+
+ vsr.allocateNew();
+ assertEquals(vsr.getRowCount(), 0);
+
+ for (int i = 0; i < size; i++) {
+ vec1.setSafe(i, i % 2);
+ vec2.setSafe(i, i);
+ }
+ vsr.setRowCount(size);
+ checkCount(vec1, vec2, vsr, size);
+
+ vsr.allocateNew();
+ checkCount(vec1, vec2, vsr, 0);
+
+ for (int i = 0; i < size; i++) {
+ vec1.setSafe(i, i % 2);
+ vec2.setSafe(i, i);
+ }
+ vsr.setRowCount(size);
+ checkCount(vec1, vec2, vsr, size);
+
+ vsr.clear();
+ checkCount(vec1, vec2, vsr, 0);
+ }
+ }
+
+ private void checkCount(BitVector vec1, IntVector vec2, VectorSchemaRoot vsr, int count) {
+ assertEquals(vec1.getValueCount(), count);
+ assertEquals(vec2.getValueCount(), count);
+ assertEquals(vsr.getRowCount(), count);
+ }
+
+ private VectorSchemaRoot createBatch() {
+ FieldType varCharType = new FieldType(true, new ArrowType.Utf8(), /*dictionary=*/null);
+ FieldType listType = new FieldType(true, new ArrowType.List(), /*dictionary=*/null);
+
+ // create the schema
+ List<Field> schemaFields = new ArrayList<>();
+ Field childField = new Field("varCharCol", varCharType, null);
+ List<Field> childFields = new ArrayList<>();
+ childFields.add(childField);
+ schemaFields.add(new Field("listCol", listType, childFields));
+ Schema schema = new Schema(schemaFields);
+
+ VectorSchemaRoot schemaRoot = VectorSchemaRoot.create(schema, allocator);
+ // get and allocate the vector
+ ListVector vector = (ListVector) schemaRoot.getVector("listCol");
+ vector.allocateNew();
+
+ // write data to the vector
+ UnionListWriter writer = vector.getWriter();
+
+ writer.setPosition(0);
+
+ // write data vector(0)
+ writer.startList();
+
+ // write data vector(0)(0)
+ writer.list().startList();
+
+ // According to the schema above, the list element should have varchar type.
+ // When we write a big int, the original writer cannot handle this, so the writer will
+ // be promoted, and the vector structure will be different from the schema.
+ writer.list().bigInt().writeBigInt(0);
+ writer.list().bigInt().writeBigInt(1);
+ writer.list().endList();
+
+ // write data vector(0)(1)
+ writer.list().startList();
+ writer.list().float8().writeFloat8(3.0D);
+ writer.list().float8().writeFloat8(7.0D);
+ writer.list().endList();
+
+ // finish data vector(0)
+ writer.endList();
+
+ writer.setPosition(1);
+
+ // write data vector(1)
+ writer.startList();
+
+ // write data vector(1)(0)
+ writer.list().startList();
+ writer.list().integer().writeInt(3);
+ writer.list().integer().writeInt(2);
+ writer.list().endList();
+
+ // finish data vector(1)
+ writer.endList();
+
+ vector.setValueCount(2);
+
+ return schemaRoot;
+ }
+
+ @Test
+ public void testAddVector() {
+ try (final IntVector intVector1 = new IntVector("intVector1", allocator);
+ final IntVector intVector2 = new IntVector("intVector2", allocator);
+ final IntVector intVector3 = new IntVector("intVector3", allocator);) {
+
+ VectorSchemaRoot original = new VectorSchemaRoot(Arrays.asList(intVector1, intVector2));
+ assertEquals(2, original.getFieldVectors().size());
+
+ VectorSchemaRoot newRecordBatch = original.addVector(1, intVector3);
+ assertEquals(3, newRecordBatch.getFieldVectors().size());
+ assertEquals(intVector3, newRecordBatch.getFieldVectors().get(1));
+
+ original.close();
+ newRecordBatch.close();
+ }
+ }
+
+ @Test
+ public void testRemoveVector() {
+ try (final IntVector intVector1 = new IntVector("intVector1", allocator);
+ final IntVector intVector2 = new IntVector("intVector2", allocator);
+ final IntVector intVector3 = new IntVector("intVector3", allocator);) {
+
+ VectorSchemaRoot original =
+ new VectorSchemaRoot(Arrays.asList(intVector1, intVector2, intVector3));
+ assertEquals(3, original.getFieldVectors().size());
+
+ VectorSchemaRoot newRecordBatch = original.removeVector(0);
+ assertEquals(2, newRecordBatch.getFieldVectors().size());
+ assertEquals(intVector2, newRecordBatch.getFieldVectors().get(0));
+ assertEquals(intVector3, newRecordBatch.getFieldVectors().get(1));
+
+ original.close();
+ newRecordBatch.close();
+ }
+ }
+
+ @Test
+ public void testSlice() {
+ try (final IntVector intVector = new IntVector("intVector", allocator);
+ final Float4Vector float4Vector = new Float4Vector("float4Vector", allocator)) {
+ intVector.setValueCount(10);
+ float4Vector.setValueCount(10);
+ for (int i = 0; i < 10; i++) {
+ intVector.setSafe(i, i);
+ float4Vector.setSafe(i, i + 0.1f);
+ }
+ final VectorSchemaRoot original = new VectorSchemaRoot(Arrays.asList(intVector, float4Vector));
+
+ VectorSchemaRoot slice1 = original.slice(0, original.getRowCount());
+ assertEquals(original, slice1);
+
+ VectorSchemaRoot slice2 = original.slice(0, 5);
+ assertEquals(5, slice2.getRowCount());
+ // validate data
+ IntVector childVector1 = (IntVector) slice2.getFieldVectors().get(0);
+ Float4Vector childVector2 = (Float4Vector) slice2.getFieldVectors().get(1);
+ for (int i = 0; i < 5; i++) {
+ assertEquals(i, childVector1.get(i));
+ assertEquals(i + 0.1f, childVector2.get(i), 0);
+ }
+
+ original.close();
+ slice2.close();
+ }
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testSliceWithInvalidParam() {
+ try (final IntVector intVector = new IntVector("intVector", allocator);
+ final Float4Vector float4Vector = new Float4Vector("float4Vector", allocator)) {
+ intVector.setValueCount(10);
+ float4Vector.setValueCount(10);
+ for (int i = 0; i < 10; i++) {
+ intVector.setSafe(i, i);
+ float4Vector.setSafe(i, i + 0.1f);
+ }
+ final VectorSchemaRoot original = new VectorSchemaRoot(Arrays.asList(intVector, float4Vector));
+
+ original.slice(0, 20);
+ }
+ }
+
+ @Test
+ public void testEquals() {
+ try (final IntVector intVector1 = new IntVector("intVector1", allocator);
+ final IntVector intVector2 = new IntVector("intVector2", allocator);
+ final IntVector intVector3 = new IntVector("intVector3", allocator);) {
+
+ intVector1.setValueCount(5);
+ for (int i = 0; i < 5; i++) {
+ intVector1.set(i, i);
+ }
+
+ VectorSchemaRoot root1 =
+ new VectorSchemaRoot(Arrays.asList(intVector1, intVector2, intVector3));
+
+ VectorSchemaRoot root2 =
+ new VectorSchemaRoot(Arrays.asList(intVector1, intVector2));
+
+ VectorSchemaRoot root3 =
+ new VectorSchemaRoot(Arrays.asList(intVector1, intVector2, intVector3));
+
+ assertFalse(root1.equals(root2));
+ assertTrue(root1.equals(root3));
+
+ root1.close();
+ root2.close();
+ root3.close();
+ }
+ }
+
+ @Test
+ public void testApproxEquals() {
+ try (final Float4Vector float4Vector1 = new Float4Vector("floatVector", allocator);
+ final Float4Vector float4Vector2 = new Float4Vector("floatVector", allocator);
+ final Float4Vector float4Vector3 = new Float4Vector("floatVector", allocator);) {
+
+ float4Vector1.setValueCount(5);
+ float4Vector2.setValueCount(5);
+ float4Vector3.setValueCount(5);
+ final float epsilon = 1.0E-6f;
+ for (int i = 0; i < 5; i++) {
+ float4Vector1.set(i, i);
+ float4Vector2.set(i, i + epsilon * 2);
+ float4Vector3.set(i, i + epsilon / 2);
+ }
+
+ VectorSchemaRoot root1 =
+ new VectorSchemaRoot(Arrays.asList(float4Vector1));
+
+ VectorSchemaRoot root2 =
+ new VectorSchemaRoot(Arrays.asList(float4Vector2));
+
+ VectorSchemaRoot root3 =
+ new VectorSchemaRoot(Arrays.asList(float4Vector3));
+
+ assertFalse(root1.approxEquals(root2));
+ assertTrue(root1.approxEquals(root3));
+
+ root1.close();
+ root2.close();
+ root3.close();
+ }
+ }
+
+ @Test
+ public void testSchemaSync() {
+ //create vector schema root
+ try (VectorSchemaRoot schemaRoot = createBatch()) {
+ Schema newSchema = new Schema(
+ schemaRoot.getFieldVectors().stream().map(vec -> vec.getField()).collect(Collectors.toList()));
+
+ assertNotEquals(newSchema, schemaRoot.getSchema());
+ assertTrue(schemaRoot.syncSchema());
+ assertEquals(newSchema, schemaRoot.getSchema());
+
+ // no schema update this time.
+ assertFalse(schemaRoot.syncSchema());
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVectorUnloadLoad.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVectorUnloadLoad.java
new file mode 100644
index 000000000..8e1941a8c
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVectorUnloadLoad.java
@@ -0,0 +1,332 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static java.util.Arrays.asList;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.complex.NonNullableStructVector;
+import org.apache.arrow.vector.complex.impl.ComplexWriterImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.complex.writer.BaseWriter.ComplexWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter;
+import org.apache.arrow.vector.complex.writer.BigIntWriter;
+import org.apache.arrow.vector.complex.writer.IntWriter;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestVectorUnloadLoad {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testUnloadLoad() throws IOException {
+ int count = 10000;
+ Schema schema;
+
+ try (
+ BufferAllocator originalVectorsAllocator =
+ allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE);
+ NonNullableStructVector parent = NonNullableStructVector.empty("parent", originalVectorsAllocator)) {
+
+ // write some data
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ StructWriter rootWriter = writer.rootAsStruct();
+ IntWriter intWriter = rootWriter.integer("int");
+ BigIntWriter bigIntWriter = rootWriter.bigInt("bigInt");
+ for (int i = 0; i < count; i++) {
+ intWriter.setPosition(i);
+ intWriter.writeInt(i);
+ bigIntWriter.setPosition(i);
+ bigIntWriter.writeBigInt(i);
+ }
+ writer.setValueCount(count);
+
+ // unload it
+ FieldVector root = parent.getChild("root");
+ schema = new Schema(root.getField().getChildren());
+ VectorUnloader vectorUnloader = newVectorUnloader(root);
+ try (
+ ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch();
+ BufferAllocator finalVectorsAllocator = allocator.newChildAllocator("final vectors", 0, Integer.MAX_VALUE);
+ VectorSchemaRoot newRoot = VectorSchemaRoot.create(schema, finalVectorsAllocator);
+ ) {
+
+ // load it
+ VectorLoader vectorLoader = new VectorLoader(newRoot);
+
+ vectorLoader.load(recordBatch);
+
+ FieldReader intReader = newRoot.getVector("int").getReader();
+ FieldReader bigIntReader = newRoot.getVector("bigInt").getReader();
+ for (int i = 0; i < count; i++) {
+ intReader.setPosition(i);
+ Assert.assertEquals(i, intReader.readInteger().intValue());
+ bigIntReader.setPosition(i);
+ Assert.assertEquals(i, bigIntReader.readLong().longValue());
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testUnloadLoadAddPadding() throws IOException {
+ int count = 10000;
+ Schema schema;
+ try (
+ BufferAllocator originalVectorsAllocator =
+ allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE);
+ NonNullableStructVector parent = NonNullableStructVector.empty("parent", originalVectorsAllocator)) {
+
+ // write some data
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ StructWriter rootWriter = writer.rootAsStruct();
+ ListWriter list = rootWriter.list("list");
+ IntWriter intWriter = list.integer();
+ for (int i = 0; i < count; i++) {
+ list.setPosition(i);
+ list.startList();
+ for (int j = 0; j < i % 4 + 1; j++) {
+ intWriter.writeInt(i);
+ }
+ list.endList();
+ }
+ writer.setValueCount(count);
+
+ // unload it
+ FieldVector root = parent.getChild("root");
+ schema = new Schema(root.getField().getChildren());
+ VectorUnloader vectorUnloader = newVectorUnloader(root);
+ try (
+ ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch();
+ BufferAllocator finalVectorsAllocator = allocator.newChildAllocator("final vectors", 0, Integer.MAX_VALUE);
+ VectorSchemaRoot newRoot = VectorSchemaRoot.create(schema, finalVectorsAllocator);
+ ) {
+ List<ArrowBuf> oldBuffers = recordBatch.getBuffers();
+ List<ArrowBuf> newBuffers = new ArrayList<>();
+ for (ArrowBuf oldBuffer : oldBuffers) {
+ long l = oldBuffer.readableBytes();
+ if (l % 64 != 0) {
+ // pad
+ l = l + 64 - l % 64;
+ }
+ ArrowBuf newBuffer = allocator.buffer(l);
+ for (long i = oldBuffer.readerIndex(); i < oldBuffer.writerIndex(); i++) {
+ newBuffer.setByte(i - oldBuffer.readerIndex(), oldBuffer.getByte(i));
+ }
+ newBuffer.readerIndex(0);
+ newBuffer.writerIndex(l);
+ newBuffers.add(newBuffer);
+ }
+
+ try (ArrowRecordBatch newBatch =
+ new ArrowRecordBatch(recordBatch.getLength(), recordBatch.getNodes(), newBuffers);) {
+ // load it
+ VectorLoader vectorLoader = new VectorLoader(newRoot);
+
+ vectorLoader.load(newBatch);
+
+ FieldReader reader = newRoot.getVector("list").getReader();
+ for (int i = 0; i < count; i++) {
+ reader.setPosition(i);
+ List<Integer> expected = new ArrayList<>();
+ for (int j = 0; j < i % 4 + 1; j++) {
+ expected.add(i);
+ }
+ Assert.assertEquals(expected, reader.readObject());
+ }
+ }
+
+ for (ArrowBuf newBuf : newBuffers) {
+ newBuf.getReferenceManager().release();
+ }
+ }
+ }
+ }
+
+ /**
+ * The validity buffer can be empty if:
+ * - all values are defined.
+ * - all values are null.
+ *
+ * @throws IOException on error
+ */
+ @Test
+ public void testLoadValidityBuffer() throws IOException {
+ Schema schema = new Schema(asList(
+ new Field("intDefined", FieldType.nullable(new ArrowType.Int(32, true)), Collections.<Field>emptyList()),
+ new Field("intNull", FieldType.nullable(new ArrowType.Int(32, true)), Collections.<Field>emptyList())
+ ));
+ int count = 10;
+ ArrowBuf[] values = new ArrowBuf[4];
+ for (int i = 0; i < 4; i += 2) {
+ ArrowBuf buf1 = allocator.buffer(BitVectorHelper.getValidityBufferSize(count));
+ ArrowBuf buf2 = allocator.buffer(count * 4); // integers
+ buf1.setZero(0, buf1.capacity());
+ buf2.setZero(0, buf2.capacity());
+ values[i] = buf1;
+ values[i + 1] = buf2;
+ for (int j = 0; j < count; j++) {
+ if (i == 2) {
+ BitVectorHelper.unsetBit(buf1, j);
+ } else {
+ BitVectorHelper.setBit(buf1, j);
+ }
+
+ buf2.setInt(j * 4, j);
+ }
+ buf1.writerIndex((int) Math.ceil(count / 8));
+ buf2.writerIndex(count * 4);
+ }
+
+ /*
+ * values[0] - validity buffer for first vector
+ * values[1] - data buffer for first vector
+ * values[2] - validity buffer for second vector
+ * values[3] - data buffer for second vector
+ */
+
+ try (
+ ArrowRecordBatch recordBatch = new ArrowRecordBatch(count, asList(new ArrowFieldNode(count, 0),
+ new ArrowFieldNode(count, count)), asList(values[0], values[1], values[2], values[3]));
+ BufferAllocator finalVectorsAllocator = allocator.newChildAllocator("final vectors", 0, Integer.MAX_VALUE);
+ VectorSchemaRoot newRoot = VectorSchemaRoot.create(schema, finalVectorsAllocator);
+ ) {
+
+ // load it
+ VectorLoader vectorLoader = new VectorLoader(newRoot);
+
+ vectorLoader.load(recordBatch);
+
+ IntVector intDefinedVector = (IntVector) newRoot.getVector("intDefined");
+ IntVector intNullVector = (IntVector) newRoot.getVector("intNull");
+ for (int i = 0; i < count; i++) {
+ assertFalse("#" + i, intDefinedVector.isNull(i));
+ assertEquals("#" + i, i, intDefinedVector.get(i));
+ assertTrue("#" + i, intNullVector.isNull(i));
+ }
+ intDefinedVector.setSafe(count + 10, 1234);
+ assertTrue(intDefinedVector.isNull(count + 1));
+ // empty slots should still default to unset
+ intDefinedVector.setSafe(count + 1, 789);
+ assertFalse(intDefinedVector.isNull(count + 1));
+ assertEquals(789, intDefinedVector.get(count + 1));
+ assertTrue(intDefinedVector.isNull(count));
+ assertTrue(intDefinedVector.isNull(count + 2));
+ assertTrue(intDefinedVector.isNull(count + 3));
+ assertTrue(intDefinedVector.isNull(count + 4));
+ assertTrue(intDefinedVector.isNull(count + 5));
+ assertTrue(intDefinedVector.isNull(count + 6));
+ assertTrue(intDefinedVector.isNull(count + 7));
+ assertTrue(intDefinedVector.isNull(count + 8));
+ assertTrue(intDefinedVector.isNull(count + 9));
+ assertFalse(intDefinedVector.isNull(count + 10));
+ assertEquals(1234, intDefinedVector.get(count + 10));
+ } finally {
+ for (ArrowBuf arrowBuf : values) {
+ arrowBuf.getReferenceManager().release();
+ }
+ }
+ }
+
+ @Test
+ public void testUnloadLoadDuplicates() throws IOException {
+ int count = 10;
+ Schema schema = new Schema(asList(
+ new Field("duplicate", FieldType.nullable(new ArrowType.Int(32, true)), Collections.<Field>emptyList()),
+ new Field("duplicate", FieldType.nullable(new ArrowType.Int(32, true)), Collections.<Field>emptyList())
+ ));
+
+ try (
+ BufferAllocator originalVectorsAllocator =
+ allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE);
+ ) {
+ List<FieldVector> sources = new ArrayList<>();
+ for (Field field : schema.getFields()) {
+ FieldVector vector = field.createVector(originalVectorsAllocator);
+ vector.allocateNew();
+ sources.add(vector);
+ IntVector intVector = (IntVector) vector;
+ for (int i = 0; i < count; i++) {
+ intVector.set(i, i);
+ }
+ intVector.setValueCount(count);
+ }
+
+ try (VectorSchemaRoot root = new VectorSchemaRoot(schema.getFields(), sources, count)) {
+ VectorUnloader vectorUnloader = new VectorUnloader(root);
+ try (ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch();
+ BufferAllocator finalVectorsAllocator =
+ allocator.newChildAllocator("final vectors", 0, Integer.MAX_VALUE);
+ VectorSchemaRoot newRoot = VectorSchemaRoot.create(schema, finalVectorsAllocator);) {
+ // load it
+ VectorLoader vectorLoader = new VectorLoader(newRoot);
+ vectorLoader.load(recordBatch);
+
+ List<FieldVector> targets = newRoot.getFieldVectors();
+ Assert.assertEquals(sources.size(), targets.size());
+ for (int k = 0; k < sources.size(); k++) {
+ IntVector src = (IntVector) sources.get(k);
+ IntVector tgt = (IntVector) targets.get(k);
+ Assert.assertEquals(src.getValueCount(), tgt.getValueCount());
+ for (int i = 0; i < count; i++) {
+ Assert.assertEquals(src.get(i), tgt.get(i));
+ }
+ }
+ }
+ }
+ }
+ }
+
+ public static VectorUnloader newVectorUnloader(FieldVector root) {
+ Schema schema = new Schema(root.getField().getChildren());
+ int valueCount = root.getValueCount();
+ List<FieldVector> fields = root.getChildrenFromFields();
+ VectorSchemaRoot vsr = new VectorSchemaRoot(schema.getFields(), fields, valueCount);
+ return new VectorUnloader(vsr);
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java
new file mode 100644
index 000000000..4495881ad
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java
@@ -0,0 +1,740 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.compare;
+
+import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.nio.charset.Charset;
+import java.util.Arrays;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.LargeVarCharVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.ZeroVector;
+import org.apache.arrow.vector.compare.util.ValueEpsilonEqualizers;
+import org.apache.arrow.vector.complex.DenseUnionVector;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.complex.impl.NullableStructWriter;
+import org.apache.arrow.vector.complex.impl.UnionFixedSizeListWriter;
+import org.apache.arrow.vector.complex.impl.UnionListWriter;
+import org.apache.arrow.vector.holders.NullableBigIntHolder;
+import org.apache.arrow.vector.holders.NullableFloat4Holder;
+import org.apache.arrow.vector.holders.NullableFloat8Holder;
+import org.apache.arrow.vector.holders.NullableIntHolder;
+import org.apache.arrow.vector.holders.NullableUInt4Holder;
+import org.apache.arrow.vector.types.FloatingPointPrecision;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Ignore;
+import org.junit.Test;
+
+public class TestRangeEqualsVisitor {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ }
+
+ private static final Charset utf8Charset = Charset.forName("UTF-8");
+ private static final byte[] STR1 = "AAAAA1".getBytes(utf8Charset);
+ private static final byte[] STR2 = "BBBBBBBBB2".getBytes(utf8Charset);
+ private static final byte[] STR3 = "CCCC3".getBytes(utf8Charset);
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testIntVectorEqualsWithNull() {
+ try (final IntVector vector1 = new IntVector("int", allocator);
+ final IntVector vector2 = new IntVector("int", allocator)) {
+
+ setVector(vector1, 1, 2);
+ setVector(vector2, 1, null);
+
+ assertFalse(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testEqualsWithTypeChange() {
+ try (final IntVector vector1 = new IntVector("vector", allocator);
+ final IntVector vector2 = new IntVector("vector", allocator);
+ final BigIntVector vector3 = new BigIntVector("vector", allocator)) {
+
+ setVector(vector1, 1, 2);
+ setVector(vector2, 1, 2);
+
+ RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2);
+ Range range = new Range(0, 0, 2);
+ assertTrue(vector1.accept(visitor, range));
+ // visitor left vector changed, will reset and check type again
+ assertFalse(vector3.accept(visitor, range));
+ }
+ }
+
+ @Test
+ public void testBaseFixedWidthVectorRangeEqual() {
+ try (final IntVector vector1 = new IntVector("int", allocator);
+ final IntVector vector2 = new IntVector("int", allocator)) {
+
+ setVector(vector1, 1, 2, 3, 4, 5);
+ setVector(vector2, 11, 2, 3, 4, 55);
+
+ RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2);
+ assertTrue(visitor.rangeEquals(new Range(1, 1, 3)));
+ }
+ }
+
+ @Test
+ public void testBaseVariableVectorRangeEquals() {
+ try (final VarCharVector vector1 = new VarCharVector("varchar", allocator);
+ final VarCharVector vector2 = new VarCharVector("varchar", allocator)) {
+
+ setVector(vector1, STR1, STR2, STR3, STR2, STR1);
+ setVector(vector2, STR1, STR2, STR3, STR2, STR1);
+
+ RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2);
+ assertTrue(visitor.rangeEquals(new Range(1, 1, 3)));
+ }
+ }
+
+ @Test
+ public void testListVectorWithDifferentChild() {
+ try (final ListVector vector1 = ListVector.empty("list", allocator);
+ final ListVector vector2 = ListVector.empty("list", allocator);) {
+
+ vector1.allocateNew();
+ vector1.initializeChildrenFromFields(
+ Arrays.asList(Field.nullable("child", new ArrowType.Int(32, true))));
+
+ vector2.allocateNew();
+ vector2.initializeChildrenFromFields(
+ Arrays.asList(Field.nullable("child", new ArrowType.Int(64, true))));
+
+ RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2);
+ assertFalse(visitor.rangeEquals(new Range(0, 0, 0)));
+ }
+ }
+
+ @Test
+ public void testListVectorRangeEquals() {
+ try (final ListVector vector1 = ListVector.empty("list", allocator);
+ final ListVector vector2 = ListVector.empty("list", allocator);) {
+
+ UnionListWriter writer1 = vector1.getWriter();
+ writer1.allocate();
+
+ //set some values
+ writeListVector(writer1, new int[] {1, 2});
+ writeListVector(writer1, new int[] {3, 4});
+ writeListVector(writer1, new int[] {5, 6});
+ writeListVector(writer1, new int[] {7, 8});
+ writeListVector(writer1, new int[] {9, 10});
+ writer1.setValueCount(5);
+
+ UnionListWriter writer2 = vector2.getWriter();
+ writer2.allocate();
+
+ //set some values
+ writeListVector(writer2, new int[] {0, 0});
+ writeListVector(writer2, new int[] {3, 4});
+ writeListVector(writer2, new int[] {5, 6});
+ writeListVector(writer2, new int[] {7, 8});
+ writeListVector(writer2, new int[] {0, 0});
+ writer2.setValueCount(5);
+
+ RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2);
+ assertTrue(visitor.rangeEquals(new Range(1, 1, 3)));
+ }
+ }
+
+ @Test
+ public void testFixedSizeListVectorRangeEquals() {
+ try (final FixedSizeListVector vector1 = FixedSizeListVector.empty("list", 2, allocator);
+ final FixedSizeListVector vector2 = FixedSizeListVector.empty("list", 2, allocator);) {
+
+ UnionFixedSizeListWriter writer1 = vector1.getWriter();
+ writer1.allocate();
+
+ //set some values
+ writeFixedSizeListVector(writer1, new int[] {1, 2});
+ writeFixedSizeListVector(writer1, new int[] {3, 4});
+ writeFixedSizeListVector(writer1, new int[] {5, 6});
+ writeFixedSizeListVector(writer1, new int[] {7, 8});
+ writeFixedSizeListVector(writer1, new int[] {9, 10});
+ writer1.setValueCount(5);
+
+ UnionFixedSizeListWriter writer2 = vector2.getWriter();
+ writer2.allocate();
+
+ //set some values
+ writeFixedSizeListVector(writer2, new int[] {0, 0});
+ writeFixedSizeListVector(writer2, new int[] {3, 4});
+ writeFixedSizeListVector(writer2, new int[] {5, 6});
+ writeFixedSizeListVector(writer2, new int[] {7, 8});
+ writeFixedSizeListVector(writer2, new int[] {0, 0});
+ writer2.setValueCount(5);
+
+ RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2);
+ assertTrue(visitor.rangeEquals(new Range(1, 1, 3)));
+ assertFalse(visitor.rangeEquals(new Range(0, 0, 5)));
+ }
+ }
+
+ @Test
+ public void testLargeVariableWidthVectorRangeEquals() {
+ try (final LargeVarCharVector vector1 = new LargeVarCharVector("vector1", allocator);
+ final LargeVarCharVector vector2 = new LargeVarCharVector("vector2", allocator)) {
+ setVector(vector1, "aaa", "bbb", "ccc", null, "ddd");
+ setVector(vector2, "ccc", "aaa", "bbb", null, "ddd");
+
+ RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2,
+ (v1, v2) -> new TypeEqualsVisitor(v2, /*check name*/ false, /*check metadata*/ false).equals(v1));
+
+ assertFalse(visitor.rangeEquals(new Range(/*left start*/ 0, /*right start*/ 0, /*length*/ 1)));
+ assertTrue(visitor.rangeEquals(new Range(/*left start*/ 0, /*right start*/ 1, /*length*/ 1)));
+ assertFalse(visitor.rangeEquals(new Range(/*left start*/ 0, /*right start*/ 0, /*length*/ 3)));
+ assertTrue(visitor.rangeEquals(new Range(/*left start*/ 0, /*right start*/ 1, /*length*/ 2)));
+ assertTrue(visitor.rangeEquals(new Range(/*left start*/ 3, /*right start*/ 3, /*length*/ 1)));
+ assertTrue(visitor.rangeEquals(new Range(/*left start*/ 3, /*right start*/ 3, /*length*/ 2)));
+ assertFalse(visitor.rangeEquals(new Range(/*left start*/ 2, /*right start*/ 2, /*length*/ 2)));
+ }
+ }
+
+ @Test
+ public void testStructVectorRangeEquals() {
+ try (final StructVector vector1 = StructVector.empty("struct", allocator);
+ final StructVector vector2 = StructVector.empty("struct", allocator);) {
+ vector1.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ vector1.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class);
+ vector2.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ vector2.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class);
+
+ NullableStructWriter writer1 = vector1.getWriter();
+ writer1.allocate();
+
+ writeStructVector(writer1, 1, 10L);
+ writeStructVector(writer1, 2, 20L);
+ writeStructVector(writer1, 3, 30L);
+ writeStructVector(writer1, 4, 40L);
+ writeStructVector(writer1, 5, 50L);
+ writer1.setValueCount(5);
+
+ NullableStructWriter writer2 = vector2.getWriter();
+ writer2.allocate();
+
+ writeStructVector(writer2, 0, 00L);
+ writeStructVector(writer2, 2, 20L);
+ writeStructVector(writer2, 3, 30L);
+ writeStructVector(writer2, 4, 40L);
+ writeStructVector(writer2, 0, 0L);
+ writer2.setValueCount(5);
+
+ RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2);
+ assertTrue(visitor.rangeEquals(new Range(1, 1, 3)));
+ }
+ }
+
+ @Test
+ public void testUnionVectorRangeEquals() {
+ try (final UnionVector vector1 = new UnionVector("union", allocator, /* field type */ null, /* call-back */ null);
+ final UnionVector vector2 =
+ new UnionVector("union", allocator, /* field type */ null, /* call-back */ null);) {
+
+ final NullableUInt4Holder uInt4Holder = new NullableUInt4Holder();
+ uInt4Holder.value = 10;
+ uInt4Holder.isSet = 1;
+
+ final NullableIntHolder intHolder = new NullableIntHolder();
+ uInt4Holder.value = 20;
+ uInt4Holder.isSet = 1;
+
+ vector1.setType(0, Types.MinorType.UINT4);
+ vector1.setSafe(0, uInt4Holder);
+
+ vector1.setType(1, Types.MinorType.INT);
+ vector1.setSafe(1, intHolder);
+
+ vector1.setType(2, Types.MinorType.INT);
+ vector1.setSafe(2, intHolder);
+ vector1.setValueCount(3);
+
+ vector2.setType(0, Types.MinorType.UINT4);
+ vector2.setSafe(0, uInt4Holder);
+
+ vector2.setType(1, Types.MinorType.INT);
+ vector2.setSafe(1, intHolder);
+
+ vector2.setType(2, Types.MinorType.INT);
+ vector2.setSafe(2, intHolder);
+ vector2.setValueCount(3);
+
+ RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2);
+ assertTrue(visitor.rangeEquals(new Range(1, 1, 2)));
+ }
+ }
+
+ /**
+ * Test comparing two union vectors.
+ * The two vectors are different in total, but have a range with equal values.
+ */
+ @Test
+ public void testUnionVectorSubRangeEquals() {
+ try (final UnionVector vector1 = new UnionVector("union", allocator, null, null);
+ final UnionVector vector2 = new UnionVector("union", allocator, null, null);) {
+
+ final NullableUInt4Holder uInt4Holder = new NullableUInt4Holder();
+ uInt4Holder.value = 10;
+ uInt4Holder.isSet = 1;
+
+ final NullableIntHolder intHolder = new NullableIntHolder();
+ intHolder.value = 20;
+ intHolder.isSet = 1;
+
+ vector1.setType(0, Types.MinorType.UINT4);
+ vector1.setSafe(0, uInt4Holder);
+
+ vector1.setType(1, Types.MinorType.INT);
+ vector1.setSafe(1, intHolder);
+
+ vector1.setType(2, Types.MinorType.INT);
+ vector1.setSafe(2, intHolder);
+
+ vector1.setType(3, Types.MinorType.INT);
+ vector1.setSafe(3, intHolder);
+
+ vector1.setValueCount(4);
+
+ vector2.setType(0, Types.MinorType.UINT4);
+ vector2.setSafe(0, uInt4Holder);
+
+ vector2.setType(1, Types.MinorType.INT);
+ vector2.setSafe(1, intHolder);
+
+ vector2.setType(2, Types.MinorType.INT);
+ vector2.setSafe(2, intHolder);
+
+ vector2.setType(3, Types.MinorType.UINT4);
+ vector2.setSafe(3, uInt4Holder);
+
+ vector2.setValueCount(4);
+
+ RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2);
+ assertFalse(visitor.rangeEquals(new Range(0, 0, 4)));
+ assertTrue(visitor.rangeEquals(new Range(1, 1, 2)));
+ }
+ }
+
+ @Test
+ public void testDenseUnionVectorEquals() {
+ final NullableIntHolder intHolder = new NullableIntHolder();
+ intHolder.isSet = 1;
+ intHolder.value = 100;
+
+ final NullableBigIntHolder bigIntHolder = new NullableBigIntHolder();
+ bigIntHolder.isSet = 1;
+ bigIntHolder.value = 200L;
+
+ final NullableFloat4Holder float4Holder = new NullableFloat4Holder();
+ float4Holder.isSet = 1;
+ float4Holder.value = 400F;
+
+ final NullableFloat8Holder float8Holder = new NullableFloat8Holder();
+ float8Holder.isSet = 1;
+ float8Holder.value = 800D;
+
+ try (DenseUnionVector vector1 = new DenseUnionVector("vector1", allocator, null, null);
+ DenseUnionVector vector2 = new DenseUnionVector("vector2", allocator, null, null)) {
+ vector1.allocateNew();
+ vector2.allocateNew();
+
+ // populate vector1: {100, 200L, null, 400F, 800D}
+ byte intTypeId = vector1.registerNewTypeId(Field.nullable("int", Types.MinorType.INT.getType()));
+ byte longTypeId = vector1.registerNewTypeId(Field.nullable("long", Types.MinorType.BIGINT.getType()));
+ byte floatTypeId = vector1.registerNewTypeId(Field.nullable("float", Types.MinorType.FLOAT4.getType()));
+ byte doubleTypeId = vector1.registerNewTypeId(Field.nullable("double", Types.MinorType.FLOAT8.getType()));
+
+ vector1.setTypeId(0, intTypeId);
+ vector1.setSafe(0, intHolder);
+
+ vector1.setTypeId(1, longTypeId);
+ vector1.setSafe(1, bigIntHolder);
+
+ vector1.setTypeId(3, floatTypeId);
+ vector1.setSafe(3, float4Holder);
+
+ vector1.setTypeId(4, doubleTypeId);
+ vector1.setSafe(4, float8Holder);
+
+ vector1.setValueCount(5);
+
+ // populate vector2: {400F, null, 200L, null, 400F, 800D, 100}
+ intTypeId = vector2.registerNewTypeId(Field.nullable("int", Types.MinorType.INT.getType()));
+ longTypeId = vector2.registerNewTypeId(Field.nullable("long", Types.MinorType.BIGINT.getType()));
+ floatTypeId = vector2.registerNewTypeId(Field.nullable("float", Types.MinorType.FLOAT4.getType()));
+ doubleTypeId = vector2.registerNewTypeId(Field.nullable("double", Types.MinorType.FLOAT8.getType()));
+
+ vector2.setTypeId(0, floatTypeId);
+ vector2.setSafe(0, float4Holder);
+
+ vector2.setTypeId(2, longTypeId);
+ vector2.setSafe(2, bigIntHolder);
+
+ vector2.setTypeId(4, floatTypeId);
+ vector2.setSafe(4, float4Holder);
+
+ vector2.setTypeId(5, doubleTypeId);
+ vector2.setSafe(5, float8Holder);
+
+ vector2.setTypeId(6, intTypeId);
+ vector2.setSafe(6, intHolder);
+
+ vector2.setValueCount(7);
+
+ // compare ranges
+ TypeEqualsVisitor typeVisitor =
+ new TypeEqualsVisitor(vector2, /* check name */ false, /* check meta data */ true);
+ RangeEqualsVisitor equalsVisitor =
+ new RangeEqualsVisitor(vector1, vector2, (left, right) -> typeVisitor.equals(left));
+
+ // different ranges {100, 200L} != {400F, null}
+ assertFalse(equalsVisitor.rangeEquals(new Range(0, 0, 2)));
+
+ // different ranges without null {100, 200L} != {400F, null}
+ assertFalse(equalsVisitor.rangeEquals(new Range(3, 5, 2)));
+
+ // equal ranges {200L, null, 400F, 800D}
+ assertTrue(equalsVisitor.rangeEquals(new Range(1, 2, 4)));
+
+ // equal ranges without null {400F, 800D}
+ assertTrue(equalsVisitor.rangeEquals(new Range(3, 4, 2)));
+
+ // equal ranges with only null {null}
+ assertTrue(equalsVisitor.rangeEquals(new Range(2, 3, 1)));
+
+ // equal ranges with single element {100}
+ assertTrue(equalsVisitor.rangeEquals(new Range(0, 6, 1)));
+
+ // different ranges with single element {100} != {200L}
+ assertFalse(equalsVisitor.rangeEquals(new Range(0, 2, 1)));
+ }
+ }
+
+ @Ignore
+ @Test
+ public void testEqualsWithOutTypeCheck() {
+ try (final IntVector intVector = new IntVector("int", allocator);
+ final ZeroVector zeroVector = new ZeroVector("zero")) {
+
+ assertTrue(VectorEqualsVisitor.vectorEquals(intVector, zeroVector, null));
+ assertTrue(VectorEqualsVisitor.vectorEquals(zeroVector, intVector, null));
+ }
+ }
+
+ @Test
+ public void testFloat4ApproxEquals() {
+ try (final Float4Vector vector1 = new Float4Vector("float", allocator);
+ final Float4Vector vector2 = new Float4Vector("float", allocator);
+ final Float4Vector vector3 = new Float4Vector("float", allocator)) {
+
+ final float epsilon = 1.0E-6f;
+ setVector(vector1, 1.1f, 2.2f);
+ setVector(vector2, 1.1f + epsilon / 2, 2.2f + epsilon / 2);
+ setVector(vector3, 1.1f + epsilon * 2, 2.2f + epsilon * 2);
+
+ Range range = new Range(0, 0, vector1.getValueCount());
+
+ ApproxEqualsVisitor visitor12 = new ApproxEqualsVisitor(vector1, vector2, epsilon, epsilon);
+ assertTrue(visitor12.rangeEquals(range));
+
+ ApproxEqualsVisitor visitor13 = new ApproxEqualsVisitor(vector1, vector3, epsilon, epsilon);
+ assertFalse(visitor13.rangeEquals(range));
+ }
+ }
+
+ @Test
+ public void testFloat8ApproxEquals() {
+ try (final Float8Vector vector1 = new Float8Vector("float", allocator);
+ final Float8Vector vector2 = new Float8Vector("float", allocator);
+ final Float8Vector vector3 = new Float8Vector("float", allocator)) {
+
+ final float epsilon = 1.0E-6f;
+ setVector(vector1, 1.1, 2.2);
+ setVector(vector2, 1.1 + epsilon / 2, 2.2 + epsilon / 2);
+ setVector(vector3, 1.1 + epsilon * 2, 2.2 + epsilon * 2);
+
+ Range range = new Range(0, 0, vector1.getValueCount());
+ assertTrue(new ApproxEqualsVisitor(vector1, vector2, epsilon, epsilon).rangeEquals(range));
+ assertFalse(new ApproxEqualsVisitor(vector1, vector3, epsilon, epsilon).rangeEquals(range));
+ }
+ }
+
+ @Test
+ public void testStructVectorApproxEquals() {
+ try (final StructVector right = StructVector.empty("struct", allocator);
+ final StructVector left1 = StructVector.empty("struct", allocator);
+ final StructVector left2 = StructVector.empty("struct", allocator)) {
+ right.addOrGet("f0",
+ FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)), Float4Vector.class);
+ right.addOrGet("f1",
+ FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), Float8Vector.class);
+ left1.addOrGet("f0",
+ FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)), Float4Vector.class);
+ left1.addOrGet("f1",
+ FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), Float8Vector.class);
+ left2.addOrGet("f0",
+ FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)), Float4Vector.class);
+ left2.addOrGet("f1",
+ FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), Float8Vector.class);
+
+ final float epsilon = 1.0E-6f;
+
+ NullableStructWriter rightWriter = right.getWriter();
+ rightWriter.allocate();
+ writeStructVector(rightWriter, 1.1f, 2.2);
+ writeStructVector(rightWriter, 2.02f, 4.04);
+ rightWriter.setValueCount(2);
+
+ NullableStructWriter leftWriter1 = left1.getWriter();
+ leftWriter1.allocate();
+ writeStructVector(leftWriter1, 1.1f + epsilon / 2, 2.2 + epsilon / 2);
+ writeStructVector(leftWriter1, 2.02f - epsilon / 2, 4.04 - epsilon / 2);
+ leftWriter1.setValueCount(2);
+
+ NullableStructWriter leftWriter2 = left2.getWriter();
+ leftWriter2.allocate();
+ writeStructVector(leftWriter2, 1.1f + epsilon * 2, 2.2 + epsilon * 2);
+ writeStructVector(leftWriter2, 2.02f - epsilon * 2, 4.04 - epsilon * 2);
+ leftWriter2.setValueCount(2);
+
+ Range range = new Range(0, 0, right.getValueCount());
+ assertTrue(new ApproxEqualsVisitor(left1, right, epsilon, epsilon).rangeEquals(range));
+ assertFalse(new ApproxEqualsVisitor(left2, right, epsilon, epsilon).rangeEquals(range));
+ }
+ }
+
+ @Test
+ public void testUnionVectorApproxEquals() {
+ try (final UnionVector right = new UnionVector("union", allocator, /* field type */ null, /* call-back */ null);
+ final UnionVector left1 = new UnionVector("union", allocator, /* field type */ null, /* call-back */ null);
+ final UnionVector left2 = new UnionVector("union", allocator, /* field type */ null, /* call-back */ null);) {
+
+ final NullableFloat4Holder float4Holder = new NullableFloat4Holder();
+ float4Holder.value = 1.01f;
+ float4Holder.isSet = 1;
+
+ final NullableFloat8Holder float8Holder = new NullableFloat8Holder();
+ float8Holder.value = 2.02f;
+ float8Holder.isSet = 1;
+
+ final float epsilon = 1.0E-6f;
+
+ right.setType(0, Types.MinorType.FLOAT4);
+ right.setSafe(0, float4Holder);
+ right.setType(1, Types.MinorType.FLOAT8);
+ right.setSafe(1, float8Holder);
+ right.setValueCount(2);
+
+ float4Holder.value += epsilon / 2;
+ float8Holder.value += epsilon / 2;
+
+ left1.setType(0, Types.MinorType.FLOAT4);
+ left1.setSafe(0, float4Holder);
+ left1.setType(1, Types.MinorType.FLOAT8);
+ left1.setSafe(1, float8Holder);
+ left1.setValueCount(2);
+
+ float4Holder.value += epsilon * 2;
+ float8Holder.value += epsilon * 2;
+
+ left2.setType(0, Types.MinorType.FLOAT4);
+ left2.setSafe(0, float4Holder);
+ left2.setType(1, Types.MinorType.FLOAT8);
+ left2.setSafe(1, float8Holder);
+ left2.setValueCount(2);
+
+ Range range = new Range(0, 0, right.getValueCount());
+ assertTrue(new ApproxEqualsVisitor(left1, right, epsilon, epsilon).rangeEquals(range));
+ assertFalse(new ApproxEqualsVisitor(left2, right, epsilon, epsilon).rangeEquals(range));
+ }
+ }
+
+ @Test
+ public void testDenseUnionVectorApproxEquals() {
+ final NullableFloat4Holder float4Holder = new NullableFloat4Holder();
+ float4Holder.isSet = 1;
+
+ final NullableFloat8Holder float8Holder = new NullableFloat8Holder();
+ float8Holder.isSet = 1;
+
+ final float floatEpsilon = 0.02F;
+ final double doubleEpsilon = 0.02;
+
+ try (final DenseUnionVector vector1 = new DenseUnionVector("vector1", allocator, null, null);
+ final DenseUnionVector vector2 = new DenseUnionVector("vector2", allocator, null, null);
+ final DenseUnionVector vector3 = new DenseUnionVector("vector2", allocator, null, null)) {
+
+ vector1.allocateNew();
+ vector2.allocateNew();
+ vector3.allocateNew();
+
+ // populate vector1: {1.0f, 2.0D}
+ byte floatTypeId = vector1.registerNewTypeId(Field.nullable("float", Types.MinorType.FLOAT4.getType()));
+ byte doubleTypeId = vector1.registerNewTypeId(Field.nullable("double", Types.MinorType.FLOAT8.getType()));
+
+ float4Holder.value = 1.0f;
+ vector1.setTypeId(0, floatTypeId);
+ vector1.setSafe(0, float4Holder);
+ float8Holder.value = 2.0D;
+ vector1.setTypeId(1, doubleTypeId);
+ vector1.setSafe(1, float8Holder);
+ vector1.setValueCount(2);
+
+ // populate vector2: {1.01f, 2.01D}
+ floatTypeId = vector2.registerNewTypeId(Field.nullable("float", Types.MinorType.FLOAT4.getType()));
+ doubleTypeId = vector2.registerNewTypeId(Field.nullable("double", Types.MinorType.FLOAT8.getType()));
+
+ float4Holder.value = 1.01f;
+ vector2.setTypeId(0, floatTypeId);
+ vector2.setSafe(0, float4Holder);
+ float8Holder.value = 2.01D;
+ vector2.setTypeId(1, doubleTypeId);
+ vector2.setSafe(1, float8Holder);
+ vector2.setValueCount(2);
+
+ // populate vector3: {1.05f, 2.05D}
+ floatTypeId = vector3.registerNewTypeId(Field.nullable("float", Types.MinorType.FLOAT4.getType()));
+ doubleTypeId = vector3.registerNewTypeId(Field.nullable("double", Types.MinorType.FLOAT8.getType()));
+
+ float4Holder.value = 1.05f;
+ vector3.setTypeId(0, floatTypeId);
+ vector3.setSafe(0, float4Holder);
+ float8Holder.value = 2.05D;
+ vector3.setTypeId(1, doubleTypeId);
+ vector3.setSafe(1, float8Holder);
+ vector3.setValueCount(2);
+
+ // verify comparison results
+ Range range = new Range(0, 0, 2);
+
+ // compare vector1 and vector2
+ ApproxEqualsVisitor approxEqualsVisitor = new ApproxEqualsVisitor(
+ vector1, vector2,
+ new ValueEpsilonEqualizers.Float4EpsilonEqualizer(floatEpsilon),
+ new ValueEpsilonEqualizers.Float8EpsilonEqualizer(doubleEpsilon),
+ (v1, v2) -> new TypeEqualsVisitor(v2, /* check name */ false, /* check meta */ true).equals(v1));
+ assertTrue(approxEqualsVisitor.rangeEquals(range));
+
+ // compare vector1 and vector3
+ approxEqualsVisitor = new ApproxEqualsVisitor(
+ vector1, vector3,
+ new ValueEpsilonEqualizers.Float4EpsilonEqualizer(floatEpsilon),
+ new ValueEpsilonEqualizers.Float8EpsilonEqualizer(doubleEpsilon),
+ (v1, v2) -> new TypeEqualsVisitor(v2, /* check name */ false, /* check meta */ true).equals(v1));
+ assertFalse(approxEqualsVisitor.rangeEquals(range));
+ }
+ }
+
+ @Test
+ public void testListVectorApproxEquals() {
+ try (final ListVector right = ListVector.empty("list", allocator);
+ final ListVector left1 = ListVector.empty("list", allocator);
+ final ListVector left2 = ListVector.empty("list", allocator);) {
+
+ final float epsilon = 1.0E-6f;
+
+ UnionListWriter rightWriter = right.getWriter();
+ rightWriter.allocate();
+ writeListVector(rightWriter, new double[] {1, 2});
+ writeListVector(rightWriter, new double[] {1.01, 2.02});
+ rightWriter.setValueCount(2);
+
+ UnionListWriter leftWriter1 = left1.getWriter();
+ leftWriter1.allocate();
+ writeListVector(leftWriter1, new double[] {1, 2});
+ writeListVector(leftWriter1, new double[] {1.01 + epsilon / 2, 2.02 - epsilon / 2});
+ leftWriter1.setValueCount(2);
+
+ UnionListWriter leftWriter2 = left2.getWriter();
+ leftWriter2.allocate();
+ writeListVector(leftWriter2, new double[] {1, 2});
+ writeListVector(leftWriter2, new double[] {1.01 + epsilon * 2, 2.02 - epsilon * 2});
+ leftWriter2.setValueCount(2);
+
+ Range range = new Range(0, 0, right.getValueCount());
+ assertTrue(new ApproxEqualsVisitor(left1, right, epsilon, epsilon).rangeEquals(range));
+ assertFalse(new ApproxEqualsVisitor(left2, right, epsilon, epsilon).rangeEquals(range));
+ }
+ }
+
+ private void writeStructVector(NullableStructWriter writer, int value1, long value2) {
+ writer.start();
+ writer.integer("f0").writeInt(value1);
+ writer.bigInt("f1").writeBigInt(value2);
+ writer.end();
+ }
+
+ private void writeStructVector(NullableStructWriter writer, float value1, double value2) {
+ writer.start();
+ writer.float4("f0").writeFloat4(value1);
+ writer.float8("f1").writeFloat8(value2);
+ writer.end();
+ }
+
+ private void writeListVector(UnionListWriter writer, int[] values) {
+ writer.startList();
+ for (int v: values) {
+ writer.integer().writeInt(v);
+ }
+ writer.endList();
+ }
+
+ private void writeFixedSizeListVector(UnionFixedSizeListWriter writer, int[] values) {
+ writer.startList();
+ for (int v: values) {
+ writer.integer().writeInt(v);
+ }
+ writer.endList();
+ }
+
+ private void writeListVector(UnionListWriter writer, double[] values) {
+ writer.startList();
+ for (double v: values) {
+ writer.float8().writeFloat8(v);
+ }
+ writer.endList();
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/compare/TestTypeEqualsVisitor.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/compare/TestTypeEqualsVisitor.java
new file mode 100644
index 000000000..c0a3bd89d
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/compare/TestTypeEqualsVisitor.java
@@ -0,0 +1,185 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.compare;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.nio.charset.Charset;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.complex.DenseUnionVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestTypeEqualsVisitor {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ }
+
+ private static final Charset utf8Charset = Charset.forName("UTF-8");
+ private static final byte[] STR1 = "AAAAA1".getBytes(utf8Charset);
+ private static final byte[] STR2 = "BBBBBBBBB2".getBytes(utf8Charset);
+ private static final byte[] STR3 = "CCCC3".getBytes(utf8Charset);
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testTypeEqualsWithName() {
+ try (final IntVector right = new IntVector("int", allocator);
+ final IntVector left1 = new IntVector("int", allocator);
+ final IntVector left2 = new IntVector("int2", allocator)) {
+
+ TypeEqualsVisitor visitor = new TypeEqualsVisitor(right);
+ assertTrue(visitor.equals(left1));
+ assertFalse(visitor.equals(left2));
+ }
+ }
+
+ @Test
+ public void testTypeEqualsWithMetadata() {
+ Map<String, String> metadata = new HashMap<>();
+ metadata.put("key1", "value1");
+ FieldType typeWithoutMeta = new FieldType(true, new ArrowType.Int(32, true),
+ null, null);
+ FieldType typeWithMeta = new FieldType(true, new ArrowType.Int(32, true),
+ null, metadata);
+
+ try (IntVector right = (IntVector) typeWithoutMeta.createNewSingleVector("int", allocator, null);
+ IntVector left1 = (IntVector) typeWithoutMeta.createNewSingleVector("int", allocator, null);
+ IntVector left2 = (IntVector) typeWithMeta.createNewSingleVector("int", allocator, null)) {
+
+ TypeEqualsVisitor visitor = new TypeEqualsVisitor(right);
+ assertTrue(visitor.equals(left1));
+ assertFalse(visitor.equals(left2));
+ }
+ }
+
+ @Test
+ public void testListTypeEquals() {
+ try (final ListVector right = ListVector.empty("list", allocator);
+ final ListVector left1 = ListVector.empty("list", allocator);
+ final ListVector left2 = ListVector.empty("list", allocator)) {
+
+ right.addOrGetVector(FieldType.nullable(new ArrowType.Utf8()));
+ left1.addOrGetVector(FieldType.nullable(new ArrowType.Utf8()));
+ left2.addOrGetVector(FieldType.nullable(new ArrowType.FixedSizeBinary(2)));
+
+ TypeEqualsVisitor visitor = new TypeEqualsVisitor(right);
+ assertTrue(visitor.equals(left1));
+ assertFalse(visitor.equals(left2));
+ }
+ }
+
+ @Test
+ public void testStructTypeEquals() {
+ try (final StructVector right = StructVector.empty("struct", allocator);
+ final StructVector left1 = StructVector.empty("struct", allocator);
+ final StructVector left2 = StructVector.empty("struct", allocator)) {
+
+ right.addOrGet("child", FieldType.nullable(new ArrowType.Utf8()), VarCharVector.class);
+ left1.addOrGet("child", FieldType.nullable(new ArrowType.Utf8()), VarCharVector.class);
+ left2.addOrGet("child2", FieldType.nullable(new ArrowType.Utf8()), VarCharVector.class);
+
+ TypeEqualsVisitor visitor = new TypeEqualsVisitor(right);
+ assertTrue(visitor.equals(left1));
+ assertFalse(visitor.equals(left2));
+ }
+ }
+
+ @Test
+ public void testUnionTypeEquals() {
+ try (final UnionVector right = new UnionVector("union", allocator, /* field type */ null, /* call-back */ null);
+ final UnionVector left1 = new UnionVector("union", allocator, /* field type */ null, /* call-back */ null);
+ final UnionVector left2 = new UnionVector("union", allocator, /* field type */ null, /* call-back */ null)) {
+
+ right.addVector(new IntVector("int", allocator));
+ left1.addVector(new IntVector("int", allocator));
+ left2.addVector(new BigIntVector("bigint", allocator));
+
+ TypeEqualsVisitor visitor = new TypeEqualsVisitor(right);
+ assertTrue(visitor.equals(left1));
+ assertFalse(visitor.equals(left2));
+ }
+ }
+
+ @Test
+ public void testDenseUnionTypeEquals() {
+ try (DenseUnionVector vector1 = new DenseUnionVector("vector1", allocator, null, null);
+ DenseUnionVector vector2 = new DenseUnionVector("vector2", allocator, null, null)) {
+ vector1.allocateNew();
+ vector2.allocateNew();
+
+ // set children for vector1
+ byte intTypeId = vector1.registerNewTypeId(Field.nullable("int", Types.MinorType.INT.getType()));
+ byte longTypeId = vector1.registerNewTypeId(Field.nullable("long", Types.MinorType.BIGINT.getType()));
+ byte floatTypeId = vector1.registerNewTypeId(Field.nullable("float", Types.MinorType.FLOAT4.getType()));
+ byte doubleTypeId = vector1.registerNewTypeId(Field.nullable("double", Types.MinorType.FLOAT8.getType()));
+
+ vector1.addVector(floatTypeId, new Float4Vector("", allocator));
+ vector1.addVector(longTypeId, new BigIntVector("", allocator));
+ vector1.addVector(intTypeId, new IntVector("", allocator));
+ vector1.addVector(doubleTypeId, new Float8Vector("", allocator));
+
+ // set children for vector2
+ intTypeId = vector2.registerNewTypeId(Field.nullable("int", Types.MinorType.INT.getType()));
+ longTypeId = vector2.registerNewTypeId(Field.nullable("long", Types.MinorType.BIGINT.getType()));
+ floatTypeId = vector2.registerNewTypeId(Field.nullable("float", Types.MinorType.FLOAT4.getType()));
+ doubleTypeId = vector2.registerNewTypeId(Field.nullable("double", Types.MinorType.FLOAT8.getType()));
+
+ // add vectors in a different order
+ vector2.addVector(intTypeId, new IntVector("", allocator));
+ vector2.addVector(floatTypeId, new Float4Vector("", allocator));
+ vector2.addVector(doubleTypeId, new Float8Vector("", allocator));
+ vector2.addVector(longTypeId, new BigIntVector("", allocator));
+
+ // compare ranges
+ TypeEqualsVisitor typeVisitor =
+ new TypeEqualsVisitor(vector2, /* check name */ false, /* check meta data */ true);
+ assertTrue(typeVisitor.equals(vector1));
+
+ // if we check names, the types should be different
+ typeVisitor =
+ new TypeEqualsVisitor(vector2, /* check name */ true, /* check meta data */ true);
+ assertFalse(typeVisitor.equals(vector1));
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestComplexCopier.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestComplexCopier.java
new file mode 100644
index 000000000..f314a98ee
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestComplexCopier.java
@@ -0,0 +1,763 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex.impl;
+
+import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import java.math.BigDecimal;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.DecimalVector;
+import org.apache.arrow.vector.compare.VectorEqualsVisitor;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.MapVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.complex.writer.BaseWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter;
+import org.apache.arrow.vector.complex.writer.FieldWriter;
+import org.apache.arrow.vector.holders.DecimalHolder;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.DecimalUtility;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestComplexCopier {
+
+ private BufferAllocator allocator;
+
+ private static final int COUNT = 100;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testCopyFixedSizeListVector() {
+ try (FixedSizeListVector from = FixedSizeListVector.empty("v", 3, allocator);
+ FixedSizeListVector to = FixedSizeListVector.empty("v", 3, allocator)) {
+
+ from.addOrGetVector(FieldType.nullable(Types.MinorType.INT.getType()));
+ to.addOrGetVector(FieldType.nullable(Types.MinorType.INT.getType()));
+
+ // populate from vector
+ UnionFixedSizeListWriter writer = from.getWriter();
+ for (int i = 0; i < COUNT; i++) {
+ writer.startList();
+ writer.integer().writeInt(i);
+ writer.integer().writeInt(i * 2);
+ writer.integer().writeInt(i * 3);
+ writer.endList();
+ }
+ from.setValueCount(COUNT);
+ to.setValueCount(COUNT);
+
+ // copy values
+ FieldReader in = from.getReader();
+ FieldWriter out = to.getWriter();
+ for (int i = 0; i < COUNT; i++) {
+ in.setPosition(i);
+ out.setPosition(i);
+ ComplexCopier.copy(in, out);
+ }
+
+ // validate equals
+ assertTrue(VectorEqualsVisitor.vectorEquals(from, to));
+
+ }
+ }
+
+ @Test
+ public void testInvalidCopyFixedSizeListVector() {
+ try (FixedSizeListVector from = FixedSizeListVector.empty("v", 3, allocator);
+ FixedSizeListVector to = FixedSizeListVector.empty("v", 2, allocator)) {
+
+ from.addOrGetVector(FieldType.nullable(Types.MinorType.INT.getType()));
+ to.addOrGetVector(FieldType.nullable(Types.MinorType.INT.getType()));
+
+ // populate from vector
+ UnionFixedSizeListWriter writer = from.getWriter();
+ for (int i = 0; i < COUNT; i++) {
+ writer.startList();
+ writer.integer().writeInt(i);
+ writer.integer().writeInt(i * 2);
+ writer.integer().writeInt(i * 3);
+ writer.endList();
+ }
+ from.setValueCount(COUNT);
+ to.setValueCount(COUNT);
+
+ // copy values
+ FieldReader in = from.getReader();
+ FieldWriter out = to.getWriter();
+ IllegalStateException e = assertThrows(IllegalStateException.class,
+ () -> ComplexCopier.copy(in, out));
+ assertTrue(e.getMessage().contains("greater than listSize"));
+ }
+ }
+
+ @Test
+ public void testCopyMapVector() {
+ try (final MapVector from = MapVector.empty("v", allocator, false);
+ final MapVector to = MapVector.empty("v", allocator, false)) {
+
+ from.allocateNew();
+
+ UnionMapWriter mapWriter = from.getWriter();
+ for (int i = 0; i < COUNT; i++) {
+ mapWriter.setPosition(i);
+ mapWriter.startMap();
+ mapWriter.startEntry();
+ mapWriter.key().integer().writeInt(i);
+ mapWriter.value().integer().writeInt(i);
+ mapWriter.endEntry();
+ mapWriter.startEntry();
+ mapWriter.key().decimal().writeDecimal(BigDecimal.valueOf(i * 2));
+ mapWriter.value().decimal().writeDecimal(BigDecimal.valueOf(i * 2));
+ mapWriter.endEntry();
+ mapWriter.endMap();
+ }
+
+ from.setValueCount(COUNT);
+
+ // copy values
+ FieldReader in = from.getReader();
+ FieldWriter out = to.getWriter();
+ for (int i = 0; i < COUNT; i++) {
+ in.setPosition(i);
+ out.setPosition(i);
+ ComplexCopier.copy(in, out);
+ }
+ to.setValueCount(COUNT);
+
+ // validate equals
+ assertTrue(VectorEqualsVisitor.vectorEquals(from, to));
+ }
+ }
+
+ @Test
+ public void testCopyListVector() {
+ try (ListVector from = ListVector.empty("v", allocator);
+ ListVector to = ListVector.empty("v", allocator)) {
+
+ UnionListWriter listWriter = from.getWriter();
+ listWriter.allocate();
+
+ for (int i = 0; i < COUNT; i++) {
+ listWriter.setPosition(i);
+ listWriter.startList();
+
+ listWriter.integer().writeInt(i);
+ listWriter.integer().writeInt(i * 2);
+
+ listWriter.list().startList();
+ listWriter.list().bigInt().writeBigInt(i);
+ listWriter.list().bigInt().writeBigInt(i * 2);
+ listWriter.list().bigInt().writeBigInt(i * 3);
+ listWriter.list().endList();
+
+ listWriter.list().startList();
+ listWriter.list().decimal().writeDecimal(BigDecimal.valueOf(i * 4));
+ listWriter.list().decimal().writeDecimal(BigDecimal.valueOf(i * 5));
+ listWriter.list().endList();
+ listWriter.endList();
+ }
+ from.setValueCount(COUNT);
+
+ // copy values
+ FieldReader in = from.getReader();
+ FieldWriter out = to.getWriter();
+ for (int i = 0; i < COUNT; i++) {
+ in.setPosition(i);
+ out.setPosition(i);
+ ComplexCopier.copy(in, out);
+ }
+
+ to.setValueCount(COUNT);
+
+ // validate equals
+ assertTrue(VectorEqualsVisitor.vectorEquals(from, to));
+
+ }
+ }
+
+ @Test
+ public void testCopyListVectorToANonEmptyList() {
+ try (ListVector from = ListVector.empty("v", allocator);
+ ListVector to = ListVector.empty("v", allocator)) {
+
+ UnionListWriter listWriter = from.getWriter();
+ listWriter.allocate();
+
+ for (int i = 0; i < COUNT; i++) {
+ listWriter.setPosition(i);
+ listWriter.startList();
+ listWriter.integer().writeInt(i);
+ listWriter.integer().writeInt(i * 2);
+ listWriter.endList();
+ }
+ from.setValueCount(COUNT);
+
+ // copy values
+ FieldReader in = from.getReader();
+ FieldWriter out = to.getWriter();
+ for (int i = 0; i < COUNT; i++) {
+ in.setPosition(i);
+ out.setPosition(i);
+ ComplexCopier.copy(in, out);
+ }
+ to.setValueCount(COUNT);
+ // validate equals
+ assertTrue(VectorEqualsVisitor.vectorEquals(from, to));
+
+ // Copy again to the target vector which is non-empty
+ for (int i = 0; i < COUNT; i++) {
+ in.setPosition(i);
+ out.setPosition(i);
+ ComplexCopier.copy(in, out);
+ }
+ to.setValueCount(COUNT);
+
+ // validate equals
+ assertTrue(VectorEqualsVisitor.vectorEquals(from, to));
+
+ // copy using copyFromSafe method
+ for (int i = 0; i < COUNT; i++) {
+ to.copyFromSafe(i, i, from);
+ }
+ to.setValueCount(COUNT);
+
+ // validate equals
+ assertTrue(VectorEqualsVisitor.vectorEquals(from, to));
+ }
+ }
+
+ @Test
+ public void testCopyListVectorWithNulls() {
+ try (ListVector from = ListVector.empty("v", allocator);
+ ListVector to = ListVector.empty("v", allocator)) {
+
+ UnionListWriter listWriter = from.getWriter();
+ listWriter.allocate();
+
+ // writer null, [null,i,null,i*2,null] alternatively
+ for (int i = 0; i < COUNT; i++) {
+ listWriter.setPosition(i);
+ if (i % 2 == 0) {
+ listWriter.writeNull();
+ continue;
+ }
+ listWriter.startList();
+ listWriter.integer().writeNull();
+ listWriter.integer().writeInt(i);
+ listWriter.integer().writeNull();
+ listWriter.integer().writeInt(i * 2);
+ listWriter.integer().writeNull();
+ listWriter.endList();
+ }
+ from.setValueCount(COUNT);
+
+ // copy values
+ FieldReader in = from.getReader();
+ FieldWriter out = to.getWriter();
+ for (int i = 0; i < COUNT; i++) {
+ in.setPosition(i);
+ out.setPosition(i);
+ ComplexCopier.copy(in, out);
+ }
+
+ to.setValueCount(COUNT);
+
+ // validate equals
+ assertTrue(VectorEqualsVisitor.vectorEquals(from, to));
+ }
+ }
+
+ @Test
+ public void testCopyListOfListVectorWithNulls() {
+ try (ListVector from = ListVector.empty("v", allocator);
+ ListVector to = ListVector.empty("v", allocator);) {
+
+ UnionListWriter listWriter = from.getWriter();
+ listWriter.allocate();
+
+ // write null, [null,[50,100,null,200],null,
+ // [null,50,null,100,null,200,null],null] alternatively
+ for (int i = 0; i < COUNT; i++) {
+ listWriter.setPosition(i);
+ if (i % 2 == 0) {
+ listWriter.writeNull();
+ continue;
+ }
+ listWriter.startList();
+ listWriter.list().writeNull();
+ listWriter.list().startList();
+ listWriter.list().bigInt().writeBigInt(50);
+ listWriter.list().bigInt().writeBigInt(100);
+ listWriter.list().bigInt().writeNull();
+ listWriter.list().bigInt().writeBigInt(200);
+ listWriter.list().endList();
+ listWriter.list().writeNull();
+ listWriter.list().startList();
+ listWriter.list().bigInt().writeNull();
+ listWriter.list().bigInt().writeBigInt(50);
+ listWriter.list().bigInt().writeNull();
+ listWriter.list().bigInt().writeBigInt(100);
+ listWriter.list().bigInt().writeNull();
+ listWriter.list().bigInt().writeBigInt(200);
+ listWriter.list().bigInt().writeNull();
+ listWriter.list().endList();
+ listWriter.list().writeNull();
+ listWriter.endList();
+ }
+ from.setValueCount(COUNT);
+
+ // copy values
+ FieldReader in = from.getReader();
+ FieldWriter out = to.getWriter();
+ for (int i = 0; i < COUNT; i++) {
+ in.setPosition(i);
+ out.setPosition(i);
+ ComplexCopier.copy(in, out);
+ }
+
+ to.setValueCount(COUNT);
+
+ // validate equals
+ assertTrue(VectorEqualsVisitor.vectorEquals(from, to));
+ }
+ }
+
+ @Test
+ public void testCopyListOStructVectorWithNulls() {
+ try (ListVector from = ListVector.empty("v", allocator);
+ ListVector to = ListVector.empty("v", allocator);) {
+
+ UnionListWriter listWriter = from.getWriter();
+ listWriter.allocate();
+
+ // write null, [null,{"f1":1,"f2":2},null,
+ // {"f1":1,"f2":2},null] alternatively
+ for (int i = 0; i < COUNT; i++) {
+ listWriter.setPosition(i);
+ if (i % 2 == 0) {
+ listWriter.writeNull();
+ continue;
+ }
+ listWriter.startList();
+ listWriter.struct().writeNull();
+ listWriter.struct().start();
+ listWriter.struct().integer("f1").writeInt(1);
+ listWriter.struct().integer("f2").writeInt(2);
+ listWriter.struct().integer("f3").writeNull();
+ listWriter.struct().end();
+ listWriter.struct().writeNull();
+ listWriter.struct().start();
+ listWriter.struct().integer("f1").writeInt(1);
+ listWriter.struct().integer("f2").writeInt(2);
+ listWriter.struct().integer("f3").writeNull();
+ listWriter.struct().end();
+ listWriter.struct().writeNull();
+ listWriter.endList();
+ }
+ from.setValueCount(COUNT);
+
+ // copy values
+ FieldReader in = from.getReader();
+ FieldWriter out = to.getWriter();
+ for (int i = 0; i < COUNT; i++) {
+ in.setPosition(i);
+ out.setPosition(i);
+ ComplexCopier.copy(in, out);
+ }
+
+ to.setValueCount(COUNT);
+
+ // validate equals
+ assertTrue(VectorEqualsVisitor.vectorEquals(from, to));
+ }
+ }
+
+ @Test
+ public void testCopyListOfListOfStructVectorWithNulls() {
+ try (ListVector from = ListVector.empty("v", allocator);
+ ListVector to = ListVector.empty("v", allocator);) {
+
+ UnionListWriter listWriter = from.getWriter();
+ listWriter.allocate();
+
+ // write null,
+ // [null,[{"f1":50},null,{"f1":100},null,{"f1":200}],null,
+ // [null,{"f1":50},null,{"f1":100},null,{"f1":200},null],null]
+ // alternatively
+ for (int i = 0; i < COUNT; i++) {
+ listWriter.setPosition(i);
+ if (i % 2 == 0) {
+ listWriter.writeNull();
+ continue;
+ }
+ listWriter.startList();
+ listWriter.list().writeNull();
+ listWriter.list().startList();
+ listWriter.list().struct().start();
+ listWriter.list().struct().bigInt("f1").writeBigInt(50);
+ listWriter.list().struct().end();
+ listWriter.list().struct().writeNull();
+ listWriter.list().struct().start();
+ listWriter.list().struct().bigInt("f1").writeBigInt(100);
+ listWriter.list().struct().end();
+ listWriter.list().struct().writeNull();
+ listWriter.list().struct().start();
+ listWriter.list().struct().bigInt("f1").writeBigInt(200);
+ listWriter.list().struct().end();
+ listWriter.list().endList();
+
+ listWriter.list().writeNull();
+
+ listWriter.list().startList();
+ listWriter.list().struct().writeNull();
+ listWriter.list().struct().start();
+ listWriter.list().struct().bigInt("f1").writeBigInt(50);
+ listWriter.list().struct().end();
+
+ listWriter.list().struct().writeNull();
+ listWriter.list().struct().start();
+ listWriter.list().struct().bigInt("f1").writeBigInt(100);
+ listWriter.list().struct().end();
+
+ listWriter.list().struct().writeNull();
+ listWriter.list().struct().start();
+ listWriter.list().struct().bigInt("f1").writeBigInt(200);
+ listWriter.list().struct().end();
+
+ listWriter.list().struct().writeNull();
+ listWriter.list().endList();
+
+ listWriter.list().writeNull();
+
+ listWriter.endList();
+ }
+ from.setValueCount(COUNT);
+
+ // copy values
+ FieldReader in = from.getReader();
+ FieldWriter out = to.getWriter();
+ for (int i = 0; i < COUNT; i++) {
+ in.setPosition(i);
+ out.setPosition(i);
+ ComplexCopier.copy(in, out);
+ }
+
+ to.setValueCount(COUNT);
+
+ // validate equals
+ assertTrue(VectorEqualsVisitor.vectorEquals(from, to));
+ }
+ }
+
+ @Test
+ public void testMapWithListValue() throws Exception {
+ try (MapVector from = MapVector.empty("map", allocator, false);
+ MapVector to = MapVector.empty("map", allocator, false)) {
+
+ UnionMapWriter mapWriter = from.getWriter();
+ BaseWriter.ListWriter valueWriter;
+
+ /* allocate memory */
+ mapWriter.allocate();
+
+ // write null, [{},
+ // {"value":[]},{"key":1,"value":[null,50,null,100,null,200,null]},
+ // null,{"key":2,"value":[null,75,null,125,null,150,null,175,null]}]
+ // alternatively
+ for (int i = 0; i < COUNT; i++) {
+ mapWriter.setPosition(i);
+ if (i % 2 == 0) {
+ mapWriter.writeNull();
+ continue;
+ }
+
+ mapWriter.startMap();
+
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeNull();
+ mapWriter.value().list().writeNull();
+ mapWriter.endEntry();
+
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeNull();
+ valueWriter = mapWriter.value().list();
+ valueWriter.startList();
+ valueWriter.endList();
+ mapWriter.endEntry();
+
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(1);
+ valueWriter = mapWriter.value().list();
+ valueWriter.startList();
+ valueWriter.bigInt().writeNull();
+ valueWriter.bigInt().writeBigInt(50);
+ valueWriter.bigInt().writeNull();
+ valueWriter.bigInt().writeBigInt(100);
+ valueWriter.bigInt().writeNull();
+ valueWriter.bigInt().writeBigInt(200);
+ valueWriter.bigInt().writeNull();
+ valueWriter.endList();
+ mapWriter.endEntry();
+
+ mapWriter.writeNull();
+
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(2);
+ valueWriter = mapWriter.value().list();
+ valueWriter.startList();
+ valueWriter.bigInt().writeNull();
+ valueWriter.bigInt().writeBigInt(75);
+ valueWriter.bigInt().writeNull();
+ valueWriter.bigInt().writeBigInt(125);
+ valueWriter.bigInt().writeNull();
+ valueWriter.bigInt().writeBigInt(150);
+ valueWriter.bigInt().writeNull();
+ valueWriter.bigInt().writeBigInt(175);
+ valueWriter.bigInt().writeNull();
+ valueWriter.endList();
+ mapWriter.endEntry();
+
+ mapWriter.endMap();
+ }
+ mapWriter.setValueCount(COUNT);
+
+ // copy values
+ FieldReader in = from.getReader();
+ FieldWriter out = to.getWriter();
+ for (int i = 0; i < COUNT; i++) {
+ in.setPosition(i);
+ out.setPosition(i);
+ ComplexCopier.copy(in, out);
+ }
+
+ to.setValueCount(COUNT);
+
+ // validate equals
+ assertTrue(VectorEqualsVisitor.vectorEquals(from, to));
+ }
+ }
+
+ @Test
+ public void testCopyFixedSizedListOfDecimalsVector() {
+ try (FixedSizeListVector from = FixedSizeListVector.empty("v", 4, allocator);
+ FixedSizeListVector to = FixedSizeListVector.empty("v", 4, allocator)) {
+ from.addOrGetVector(FieldType.nullable(new ArrowType.Decimal(3, 0, 128)));
+ to.addOrGetVector(FieldType.nullable(new ArrowType.Decimal(3, 0, 128)));
+
+ DecimalHolder holder = new DecimalHolder();
+ holder.buffer = allocator.buffer(DecimalVector.TYPE_WIDTH);
+ ArrowType arrowType = new ArrowType.Decimal(3, 0, 128);
+
+ // populate from vector
+ UnionFixedSizeListWriter writer = from.getWriter();
+ for (int i = 0; i < COUNT; i++) {
+ writer.startList();
+ writer.decimal().writeDecimal(BigDecimal.valueOf(i));
+
+ DecimalUtility.writeBigDecimalToArrowBuf(new BigDecimal(i * 2), holder.buffer, 0, DecimalVector.TYPE_WIDTH);
+ holder.start = 0;
+ holder.scale = 0;
+ holder.precision = 3;
+ writer.decimal().write(holder);
+
+ DecimalUtility.writeBigDecimalToArrowBuf(new BigDecimal(i * 3), holder.buffer, 0, DecimalVector.TYPE_WIDTH);
+ writer.decimal().writeDecimal(0, holder.buffer, arrowType);
+
+ writer.decimal().writeBigEndianBytesToDecimal(BigDecimal.valueOf(i * 4).unscaledValue().toByteArray(),
+ arrowType);
+
+ writer.endList();
+ }
+ from.setValueCount(COUNT);
+ to.setValueCount(COUNT);
+
+ // copy values
+ FieldReader in = from.getReader();
+ FieldWriter out = to.getWriter();
+ for (int i = 0; i < COUNT; i++) {
+ in.setPosition(i);
+ out.setPosition(i);
+ ComplexCopier.copy(in, out);
+ }
+
+ // validate equals
+ assertTrue(VectorEqualsVisitor.vectorEquals(from, to));
+ holder.buffer.close();
+ }
+ }
+
+ @Test
+ public void testCopyUnionListWithDecimal() {
+ try (ListVector from = ListVector.empty("v", allocator);
+ ListVector to = ListVector.empty("v", allocator)) {
+
+ UnionListWriter listWriter = from.getWriter();
+ listWriter.allocate();
+
+ for (int i = 0; i < COUNT; i++) {
+ listWriter.setPosition(i);
+ listWriter.startList();
+
+ listWriter.decimal().writeDecimal(BigDecimal.valueOf(i * 2));
+ listWriter.integer().writeInt(i);
+ listWriter.decimal().writeBigEndianBytesToDecimal(BigDecimal.valueOf(i * 3).unscaledValue().toByteArray(),
+ new ArrowType.Decimal(3, 0, 128));
+
+ listWriter.endList();
+ }
+ from.setValueCount(COUNT);
+
+ // copy values
+ FieldReader in = from.getReader();
+ FieldWriter out = to.getWriter();
+ for (int i = 0; i < COUNT; i++) {
+ in.setPosition(i);
+ out.setPosition(i);
+ ComplexCopier.copy(in, out);
+ }
+
+ to.setValueCount(COUNT);
+
+ // validate equals
+ assertTrue(VectorEqualsVisitor.vectorEquals(from, to));
+
+ }
+ }
+
+ @Test
+ public void testCopyStructVector() {
+ try (final StructVector from = StructVector.empty("v", allocator);
+ final StructVector to = StructVector.empty("v", allocator)) {
+
+ from.allocateNewSafe();
+
+ NullableStructWriter structWriter = from.getWriter();
+ for (int i = 0; i < COUNT; i++) {
+ structWriter.setPosition(i);
+ structWriter.start();
+ structWriter.integer("int").writeInt(i);
+ structWriter.decimal("dec", 0, 38).writeDecimal(BigDecimal.valueOf(i * 2));
+ StructWriter innerStructWriter = structWriter.struct("struc");
+ innerStructWriter.start();
+ innerStructWriter.integer("innerint").writeInt(i * 3);
+ innerStructWriter.decimal("innerdec", 0, 38).writeDecimal(BigDecimal.valueOf(i * 4));
+ innerStructWriter.decimal("innerdec", 0, 38).writeBigEndianBytesToDecimal(BigDecimal.valueOf(i * 4)
+ .unscaledValue().toByteArray(), new ArrowType.Decimal(3, 0, 128));
+ innerStructWriter.end();
+ structWriter.end();
+ }
+
+ from.setValueCount(COUNT);
+
+ // copy values
+ FieldReader in = from.getReader();
+ FieldWriter out = to.getWriter();
+ for (int i = 0; i < COUNT; i++) {
+ in.setPosition(i);
+ out.setPosition(i);
+ ComplexCopier.copy(in, out);
+ }
+ to.setValueCount(COUNT);
+
+ // validate equals
+ assertTrue(VectorEqualsVisitor.vectorEquals(from, to));
+ }
+ }
+
+ @Test
+ public void testCopyDecimalVectorWrongScale() {
+ try (FixedSizeListVector from = FixedSizeListVector.empty("v", 3, allocator);
+ FixedSizeListVector to = FixedSizeListVector.empty("v", 3, allocator)) {
+ from.addOrGetVector(FieldType.nullable(new ArrowType.Decimal(3, 2, 128)));
+ to.addOrGetVector(FieldType.nullable(new ArrowType.Decimal(3, 1, 128)));
+
+ // populate from vector
+ UnionFixedSizeListWriter writer = from.getWriter();
+ for (int i = 0; i < COUNT; i++) {
+ writer.startList();
+ writer.decimal().writeDecimal(BigDecimal.valueOf(1.23));
+ writer.decimal().writeDecimal(BigDecimal.valueOf(2.45));
+ writer.endList();
+ }
+ from.setValueCount(COUNT);
+ to.setValueCount(COUNT);
+
+ // copy values
+ FieldReader in = from.getReader();
+ FieldWriter out = to.getWriter();
+ UnsupportedOperationException e = assertThrows(UnsupportedOperationException.class,
+ () -> ComplexCopier.copy(in, out));
+ assertTrue(e.getMessage().contains("BigDecimal scale must equal that in the Arrow vector: 2 != 1"));
+ }
+ }
+
+ @Test
+ public void testCopyStructVectorWithNulls() {
+ try (StructVector from = StructVector.empty("v", allocator);
+ StructVector to = StructVector.empty("v", allocator)) {
+
+ NullableStructWriter writer = from.getWriter();
+
+ for (int i = 0; i < COUNT; ++i) {
+ writer.setPosition(i);
+ writer.start();
+ writer.integer("int").writeInt(i);
+ if (i % 3 == 0) {
+ writer.float4("child").writeFloat4(12.3f);
+ } else if (i % 3 == 1) {
+ writer.integer("child").writeInt(123);
+ } else {
+ writer.integer("child").writeNull();
+ }
+ writer.end();
+ }
+ from.setValueCount(COUNT);
+
+ // copy values
+ FieldReader in = from.getReader();
+ FieldWriter out = to.getWriter();
+ for (int i = 0; i < COUNT; i++) {
+ in.setPosition(i);
+ out.setPosition(i);
+ ComplexCopier.copy(in, out);
+ }
+ to.setValueCount(COUNT);
+
+ // validate equals
+ assertTrue(VectorEqualsVisitor.vectorEquals(from, to));
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java
new file mode 100644
index 000000000..9dce33122
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java
@@ -0,0 +1,167 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex.impl;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.DirtyRootAllocator;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.NonNullableStructVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeID;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestPromotableWriter {
+ private static final String EMPTY_SCHEMA_PATH = "";
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testPromoteToUnion() throws Exception {
+
+ try (final NonNullableStructVector container = NonNullableStructVector.empty(EMPTY_SCHEMA_PATH, allocator);
+ final StructVector v = container.addOrGetStruct("test");
+ final PromotableWriter writer = new PromotableWriter(v, container)) {
+
+ container.allocateNew();
+
+ writer.start();
+
+ writer.setPosition(0);
+ writer.bit("A").writeBit(0);
+
+ writer.setPosition(1);
+ writer.bit("A").writeBit(1);
+
+ writer.decimal("dec", 10, 10);
+
+ writer.setPosition(2);
+ writer.integer("A").writeInt(10);
+
+ // we don't write anything in 3
+
+ writer.setPosition(4);
+ writer.integer("A").writeInt(100);
+
+ writer.end();
+
+ container.setValueCount(5);
+
+ final UnionVector uv = v.getChild("A", UnionVector.class);
+
+ assertFalse("0 shouldn't be null", uv.isNull(0));
+ assertEquals(false, uv.getObject(0));
+
+ assertFalse("1 shouldn't be null", uv.isNull(1));
+ assertEquals(true, uv.getObject(1));
+
+ assertFalse("2 shouldn't be null", uv.isNull(2));
+ assertEquals(10, uv.getObject(2));
+
+ assertNull("3 should be null", uv.getObject(3));
+
+ assertFalse("4 shouldn't be null", uv.isNull(4));
+ assertEquals(100, uv.getObject(4));
+
+ container.clear();
+ container.allocateNew();
+
+ ComplexWriterImpl newWriter = new ComplexWriterImpl(EMPTY_SCHEMA_PATH, container);
+
+ StructWriter newStructWriter = newWriter.rootAsStruct();
+
+ newStructWriter.start();
+
+ newStructWriter.setPosition(2);
+ newStructWriter.integer("A").writeInt(10);
+
+ Field childField1 = container.getField().getChildren().get(0).getChildren().get(0);
+ Field childField2 = container.getField().getChildren().get(0).getChildren().get(1);
+ assertEquals("Child field should be union type: " +
+ childField1.getName(), ArrowTypeID.Union, childField1.getType().getTypeID());
+ assertEquals("Child field should be decimal type: " +
+ childField2.getName(), ArrowTypeID.Decimal, childField2.getType().getTypeID());
+ }
+ }
+
+ @Test
+ public void testNoPromoteToUnionWithNull() throws Exception {
+
+ try (final NonNullableStructVector container = NonNullableStructVector.empty(EMPTY_SCHEMA_PATH, allocator);
+ final StructVector v = container.addOrGetStruct("test");
+ final PromotableWriter writer = new PromotableWriter(v, container)) {
+
+ container.allocateNew();
+
+ writer.start();
+ writer.list("list").startList();
+ writer.list("list").endList();
+ writer.end();
+
+ FieldType childTypeOfListInContainer = container.getField().getChildren().get(0).getChildren().get(0)
+ .getChildren().get(0).getFieldType();
+
+
+ // create a listvector with same type as list in container to, say, hold a copy
+ // this will be a nullvector
+ ListVector lv = ListVector.empty("name", allocator);
+ lv.addOrGetVector(childTypeOfListInContainer);
+ assertEquals(childTypeOfListInContainer.getType(), Types.MinorType.NULL.getType());
+ assertEquals(lv.getChildrenFromFields().get(0).getMinorType().getType(), Types.MinorType.NULL.getType());
+
+ writer.start();
+ writer.list("list").startList();
+ writer.list("list").float4().writeFloat4(1.36f);
+ writer.list("list").endList();
+ writer.end();
+
+ container.setValueCount(2);
+
+ childTypeOfListInContainer = container.getField().getChildren().get(0).getChildren().get(0)
+ .getChildren().get(0).getFieldType();
+
+ // repeat but now the type in container has been changed from null to float
+ // we expect same behaviour from listvector
+ lv.addOrGetVector(childTypeOfListInContainer);
+ assertEquals(childTypeOfListInContainer.getType(), Types.MinorType.FLOAT4.getType());
+ assertEquals(lv.getChildrenFromFields().get(0).getMinorType().getType(), Types.MinorType.FLOAT4.getType());
+
+ lv.close();
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java
new file mode 100644
index 000000000..d4cf6ea89
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java
@@ -0,0 +1,1335 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex.writer;
+
+import static org.junit.Assert.*;
+
+import java.math.BigDecimal;
+import java.time.LocalDateTime;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.util.AutoCloseables;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.DecimalVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.SchemaChangeCallBack;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.MapVector;
+import org.apache.arrow.vector.complex.NonNullableStructVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.complex.impl.ComplexWriterImpl;
+import org.apache.arrow.vector.complex.impl.SingleStructReaderImpl;
+import org.apache.arrow.vector.complex.impl.SingleStructWriter;
+import org.apache.arrow.vector.complex.impl.UnionListReader;
+import org.apache.arrow.vector.complex.impl.UnionListWriter;
+import org.apache.arrow.vector.complex.impl.UnionMapReader;
+import org.apache.arrow.vector.complex.impl.UnionReader;
+import org.apache.arrow.vector.complex.impl.UnionWriter;
+import org.apache.arrow.vector.complex.reader.BaseReader.StructReader;
+import org.apache.arrow.vector.complex.reader.BigIntReader;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.complex.reader.Float4Reader;
+import org.apache.arrow.vector.complex.reader.Float8Reader;
+import org.apache.arrow.vector.complex.reader.IntReader;
+import org.apache.arrow.vector.complex.writer.BaseWriter.ComplexWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter;
+import org.apache.arrow.vector.holders.DecimalHolder;
+import org.apache.arrow.vector.holders.IntHolder;
+import org.apache.arrow.vector.holders.NullableTimeStampNanoTZHolder;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeID;
+import org.apache.arrow.vector.types.pojo.ArrowType.Int;
+import org.apache.arrow.vector.types.pojo.ArrowType.Struct;
+import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp;
+import org.apache.arrow.vector.types.pojo.ArrowType.Union;
+import org.apache.arrow.vector.types.pojo.ArrowType.Utf8;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.DecimalUtility;
+import org.apache.arrow.vector.util.JsonStringArrayList;
+import org.apache.arrow.vector.util.JsonStringHashMap;
+import org.apache.arrow.vector.util.Text;
+import org.apache.arrow.vector.util.TransferPair;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestComplexWriter {
+
+ private BufferAllocator allocator;
+
+ private static final int COUNT = 100;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Integer.MAX_VALUE);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void simpleNestedTypes() {
+ NonNullableStructVector parent = populateStructVector(null);
+ StructReader rootReader = new SingleStructReaderImpl(parent).reader("root");
+ for (int i = 0; i < COUNT; i++) {
+ rootReader.setPosition(i);
+ Assert.assertEquals(i, rootReader.reader("int").readInteger().intValue());
+ Assert.assertEquals(i, rootReader.reader("bigInt").readLong().longValue());
+ }
+
+ parent.close();
+ }
+
+ @Test
+ public void transferPairSchemaChange() {
+ SchemaChangeCallBack callBack1 = new SchemaChangeCallBack();
+ SchemaChangeCallBack callBack2 = new SchemaChangeCallBack();
+ try (NonNullableStructVector parent = populateStructVector(callBack1)) {
+ TransferPair tp = parent.getTransferPair("newVector", allocator, callBack2);
+
+ ComplexWriter writer = new ComplexWriterImpl("newWriter", parent);
+ StructWriter rootWriter = writer.rootAsStruct();
+ IntWriter intWriter = rootWriter.integer("newInt");
+ intWriter.writeInt(1);
+ writer.setValueCount(1);
+
+ assertTrue(callBack1.getSchemaChangedAndReset());
+ // The second vector should not have registered a schema change
+ assertFalse(callBack1.getSchemaChangedAndReset());
+ }
+ }
+
+ private NonNullableStructVector populateStructVector(CallBack callBack) {
+ NonNullableStructVector parent =
+ new NonNullableStructVector("parent", allocator, new FieldType(false, Struct.INSTANCE, null, null), callBack);
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ StructWriter rootWriter = writer.rootAsStruct();
+ IntWriter intWriter = rootWriter.integer("int");
+ BigIntWriter bigIntWriter = rootWriter.bigInt("bigInt");
+ for (int i = 0; i < COUNT; i++) {
+ rootWriter.start();
+ intWriter.writeInt(i);
+ bigIntWriter.writeBigInt(i);
+ rootWriter.end();
+ }
+ writer.setValueCount(COUNT);
+ return parent;
+ }
+
+ @Test
+ public void nullableStruct() {
+ try (NonNullableStructVector structVector = NonNullableStructVector.empty("parent", allocator)) {
+ ComplexWriter writer = new ComplexWriterImpl("root", structVector);
+ StructWriter rootWriter = writer.rootAsStruct();
+ for (int i = 0; i < COUNT; i++) {
+ rootWriter.start();
+ if (i % 2 == 0) {
+ StructWriter structWriter = rootWriter.struct("struct");
+ structWriter.setPosition(i);
+ structWriter.start();
+ structWriter.bigInt("nested").writeBigInt(i);
+ structWriter.end();
+ }
+ rootWriter.end();
+ }
+ writer.setValueCount(COUNT);
+ checkNullableStruct(structVector);
+ }
+ }
+
+ /**
+ * This test is similar to {@link #nullableStruct()} ()} but we get the inner struct writer once at the beginning.
+ */
+ @Test
+ public void nullableStruct2() {
+ try (NonNullableStructVector structVector = NonNullableStructVector.empty("parent", allocator)) {
+ ComplexWriter writer = new ComplexWriterImpl("root", structVector);
+ StructWriter rootWriter = writer.rootAsStruct();
+ StructWriter structWriter = rootWriter.struct("struct");
+
+ for (int i = 0; i < COUNT; i++) {
+ rootWriter.start();
+ if (i % 2 == 0) {
+ structWriter.setPosition(i);
+ structWriter.start();
+ structWriter.bigInt("nested").writeBigInt(i);
+ structWriter.end();
+ }
+ rootWriter.end();
+ }
+ writer.setValueCount(COUNT);
+ checkNullableStruct(structVector);
+ }
+ }
+
+ private void checkNullableStruct(NonNullableStructVector structVector) {
+ StructReader rootReader = new SingleStructReaderImpl(structVector).reader("root");
+ for (int i = 0; i < COUNT; i++) {
+ rootReader.setPosition(i);
+ assertTrue("index is set: " + i, rootReader.isSet());
+ FieldReader struct = rootReader.reader("struct");
+ if (i % 2 == 0) {
+ assertTrue("index is set: " + i, struct.isSet());
+ assertNotNull("index is set: " + i, struct.readObject());
+ assertEquals(i, struct.reader("nested").readLong().longValue());
+ } else {
+ assertFalse("index is not set: " + i, struct.isSet());
+ assertNull("index is not set: " + i, struct.readObject());
+ }
+ }
+ }
+
+ @Test
+ public void testList() {
+ try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) {
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ StructWriter rootWriter = writer.rootAsStruct();
+
+ rootWriter.start();
+ rootWriter.bigInt("int").writeBigInt(0);
+ rootWriter.list("list").startList();
+ rootWriter.list("list").bigInt().writeBigInt(0);
+ rootWriter.list("list").endList();
+ rootWriter.end();
+
+ rootWriter.start();
+ rootWriter.bigInt("int").writeBigInt(1);
+ rootWriter.end();
+
+ writer.setValueCount(2);
+
+ StructReader rootReader = new SingleStructReaderImpl(parent).reader("root");
+
+ rootReader.setPosition(0);
+ assertTrue("row 0 list is not set", rootReader.reader("list").isSet());
+ assertEquals(Long.valueOf(0), rootReader.reader("list").reader().readLong());
+
+ rootReader.setPosition(1);
+ assertFalse("row 1 list is set", rootReader.reader("list").isSet());
+ }
+ }
+
+ @Test
+ public void listScalarType() {
+ try (ListVector listVector = ListVector.empty("list", allocator)) {
+ listVector.allocateNew();
+ UnionListWriter listWriter = new UnionListWriter(listVector);
+ for (int i = 0; i < COUNT; i++) {
+ listWriter.startList();
+ for (int j = 0; j < i % 7; j++) {
+ if (j % 2 == 0) {
+ listWriter.writeInt(j);
+ } else {
+ IntHolder holder = new IntHolder();
+ holder.value = j;
+ listWriter.write(holder);
+ }
+ }
+ listWriter.endList();
+ }
+ listWriter.setValueCount(COUNT);
+ UnionListReader listReader = new UnionListReader(listVector);
+ for (int i = 0; i < COUNT; i++) {
+ listReader.setPosition(i);
+ for (int j = 0; j < i % 7; j++) {
+ listReader.next();
+ assertEquals(j, listReader.reader().readInteger().intValue());
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testListScalarNull() {
+ /* Write to a integer list vector
+ * each list of size 8 and having it's data values alternating between null and a non-null.
+ * Read and verify
+ */
+ try (ListVector listVector = ListVector.empty("list", allocator)) {
+ listVector.allocateNew();
+ UnionListWriter listWriter = new UnionListWriter(listVector);
+ for (int i = 0; i < COUNT; i++) {
+ listWriter.startList();
+ for (int j = 0; j < i % 7; j++) {
+ if (j % 2 == 0) {
+ listWriter.writeNull();
+ } else {
+ IntHolder holder = new IntHolder();
+ holder.value = j;
+ listWriter.write(holder);
+ }
+ }
+ listWriter.endList();
+ }
+ listWriter.setValueCount(COUNT);
+ UnionListReader listReader = new UnionListReader(listVector);
+ for (int i = 0; i < COUNT; i++) {
+ listReader.setPosition(i);
+ for (int j = 0; j < i % 7; j++) {
+ listReader.next();
+ if (j % 2 == 0) {
+ assertFalse("index is set: " + j, listReader.reader().isSet());
+ } else {
+ assertTrue("index is not set: " + j, listReader.reader().isSet());
+ assertEquals(j, listReader.reader().readInteger().intValue());
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void listDecimalType() {
+ try (ListVector listVector = ListVector.empty("list", allocator)) {
+ listVector.allocateNew();
+ UnionListWriter listWriter = new UnionListWriter(listVector);
+ DecimalHolder holder = new DecimalHolder();
+ holder.buffer = allocator.buffer(DecimalVector.TYPE_WIDTH);
+ ArrowType arrowType = new ArrowType.Decimal(10, 0, 128);
+ for (int i = 0; i < COUNT; i++) {
+ listWriter.startList();
+ for (int j = 0; j < i % 7; j++) {
+ if (j % 4 == 0) {
+ listWriter.writeDecimal(new BigDecimal(j));
+ } else if (j % 4 == 1) {
+ DecimalUtility.writeBigDecimalToArrowBuf(new BigDecimal(j), holder.buffer, 0, DecimalVector.TYPE_WIDTH);
+ holder.start = 0;
+ holder.scale = 0;
+ holder.precision = 10;
+ listWriter.write(holder);
+ } else if (j % 4 == 2) {
+ DecimalUtility.writeBigDecimalToArrowBuf(new BigDecimal(j), holder.buffer, 0, DecimalVector.TYPE_WIDTH);
+ listWriter.writeDecimal(0, holder.buffer, arrowType);
+ } else {
+ byte[] value = BigDecimal.valueOf(j).unscaledValue().toByteArray();
+ listWriter.writeBigEndianBytesToDecimal(value, arrowType);
+ }
+ }
+ listWriter.endList();
+ }
+ listWriter.setValueCount(COUNT);
+ UnionListReader listReader = new UnionListReader(listVector);
+ for (int i = 0; i < COUNT; i++) {
+ listReader.setPosition(i);
+ for (int j = 0; j < i % 7; j++) {
+ listReader.next();
+ Object expected = new BigDecimal(j);
+ Object actual = listReader.reader().readBigDecimal();
+ assertEquals(expected, actual);
+ }
+ }
+ holder.buffer.close();
+ }
+ }
+
+ @Test
+ public void listScalarTypeNullable() {
+ try (ListVector listVector = ListVector.empty("list", allocator)) {
+ listVector.allocateNew();
+ UnionListWriter listWriter = new UnionListWriter(listVector);
+ for (int i = 0; i < COUNT; i++) {
+ if (i % 2 == 0) {
+ listWriter.setPosition(i);
+ listWriter.startList();
+ for (int j = 0; j < i % 7; j++) {
+ listWriter.writeInt(j);
+ }
+ listWriter.endList();
+ }
+ }
+ listWriter.setValueCount(COUNT);
+ UnionListReader listReader = new UnionListReader(listVector);
+ for (int i = 0; i < COUNT; i++) {
+ listReader.setPosition(i);
+ if (i % 2 == 0) {
+ assertTrue("index is set: " + i, listReader.isSet());
+ assertEquals("correct length at: " + i, i % 7, ((List<?>) listReader.readObject()).size());
+ } else {
+ assertFalse("index is not set: " + i, listReader.isSet());
+ assertNull("index is not set: " + i, listReader.readObject());
+ }
+ }
+ }
+ }
+
+ @Test
+ public void listStructType() {
+ try (ListVector listVector = ListVector.empty("list", allocator)) {
+ listVector.allocateNew();
+ UnionListWriter listWriter = new UnionListWriter(listVector);
+ StructWriter structWriter = listWriter.struct();
+ for (int i = 0; i < COUNT; i++) {
+ listWriter.startList();
+ for (int j = 0; j < i % 7; j++) {
+ structWriter.start();
+ structWriter.integer("int").writeInt(j);
+ structWriter.bigInt("bigInt").writeBigInt(j);
+ structWriter.end();
+ }
+ listWriter.endList();
+ }
+ listWriter.setValueCount(COUNT);
+ UnionListReader listReader = new UnionListReader(listVector);
+ for (int i = 0; i < COUNT; i++) {
+ listReader.setPosition(i);
+ for (int j = 0; j < i % 7; j++) {
+ listReader.next();
+ Assert.assertEquals("record: " + i, j, listReader.reader().reader("int").readInteger().intValue());
+ Assert.assertEquals(j, listReader.reader().reader("bigInt").readLong().longValue());
+ }
+ }
+ }
+ }
+
+ @Test
+ public void listListType() {
+ try (ListVector listVector = ListVector.empty("list", allocator)) {
+ listVector.allocateNew();
+ UnionListWriter listWriter = new UnionListWriter(listVector);
+ for (int i = 0; i < COUNT; i++) {
+ listWriter.startList();
+ for (int j = 0; j < i % 7; j++) {
+ ListWriter innerListWriter = listWriter.list();
+ innerListWriter.startList();
+ for (int k = 0; k < i % 13; k++) {
+ innerListWriter.integer().writeInt(k);
+ }
+ innerListWriter.endList();
+ }
+ listWriter.endList();
+ }
+ listWriter.setValueCount(COUNT);
+ checkListOfLists(listVector);
+ }
+ }
+
+ /**
+ * This test is similar to {@link #listListType()} but we get the inner list writer once at the beginning.
+ */
+ @Test
+ public void listListType2() {
+ try (ListVector listVector = ListVector.empty("list", allocator)) {
+ listVector.allocateNew();
+ UnionListWriter listWriter = new UnionListWriter(listVector);
+ ListWriter innerListWriter = listWriter.list();
+
+ for (int i = 0; i < COUNT; i++) {
+ listWriter.startList();
+ for (int j = 0; j < i % 7; j++) {
+ innerListWriter.startList();
+ for (int k = 0; k < i % 13; k++) {
+ innerListWriter.integer().writeInt(k);
+ }
+ innerListWriter.endList();
+ }
+ listWriter.endList();
+ }
+ listWriter.setValueCount(COUNT);
+ checkListOfLists(listVector);
+ }
+ }
+
+ private void checkListOfLists(final ListVector listVector) {
+ UnionListReader listReader = new UnionListReader(listVector);
+ for (int i = 0; i < COUNT; i++) {
+ listReader.setPosition(i);
+ for (int j = 0; j < i % 7; j++) {
+ listReader.next();
+ FieldReader innerListReader = listReader.reader();
+ for (int k = 0; k < i % 13; k++) {
+ innerListReader.next();
+ Assert.assertEquals("record: " + i, k, innerListReader.reader().readInteger().intValue());
+ }
+ }
+ }
+ }
+
+ @Test
+ public void unionListListType() {
+ try (ListVector listVector = ListVector.empty("list", allocator)) {
+ listVector.allocateNew();
+ UnionListWriter listWriter = new UnionListWriter(listVector);
+ for (int i = 0; i < COUNT; i++) {
+ listWriter.startList();
+ for (int j = 0; j < i % 7; j++) {
+ ListWriter innerListWriter = listWriter.list();
+ innerListWriter.startList();
+ for (int k = 0; k < i % 13; k++) {
+ if (k % 2 == 0) {
+ innerListWriter.integer().writeInt(k);
+ } else {
+ innerListWriter.bigInt().writeBigInt(k);
+ }
+ }
+ innerListWriter.endList();
+ }
+ listWriter.endList();
+ }
+ listWriter.setValueCount(COUNT);
+ checkUnionList(listVector);
+ }
+ }
+
+ /**
+ * This test is similar to {@link #unionListListType()} but we get the inner list writer once at the beginning.
+ */
+ @Test
+ public void unionListListType2() {
+ try (ListVector listVector = ListVector.empty("list", allocator)) {
+ listVector.allocateNew();
+ UnionListWriter listWriter = new UnionListWriter(listVector);
+ ListWriter innerListWriter = listWriter.list();
+
+ for (int i = 0; i < COUNT; i++) {
+ listWriter.startList();
+ for (int j = 0; j < i % 7; j++) {
+ innerListWriter.startList();
+ for (int k = 0; k < i % 13; k++) {
+ if (k % 2 == 0) {
+ innerListWriter.integer().writeInt(k);
+ } else {
+ innerListWriter.bigInt().writeBigInt(k);
+ }
+ }
+ innerListWriter.endList();
+ }
+ listWriter.endList();
+ }
+ listWriter.setValueCount(COUNT);
+ checkUnionList(listVector);
+ }
+ }
+
+ private void checkUnionList(ListVector listVector) {
+ UnionListReader listReader = new UnionListReader(listVector);
+ for (int i = 0; i < COUNT; i++) {
+ listReader.setPosition(i);
+ for (int j = 0; j < i % 7; j++) {
+ listReader.next();
+ FieldReader innerListReader = listReader.reader();
+ for (int k = 0; k < i % 13; k++) {
+ innerListReader.next();
+ if (k % 2 == 0) {
+ Assert.assertEquals("record: " + i, k, innerListReader.reader().readInteger().intValue());
+ } else {
+ Assert.assertEquals("record: " + i, k, innerListReader.reader().readLong().longValue());
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testListMapType() {
+ try (ListVector listVector = ListVector.empty("list", allocator)) {
+ listVector.allocateNew();
+ UnionListWriter listWriter = new UnionListWriter(listVector);
+ MapWriter innerMapWriter = listWriter.map(true);
+
+ for (int i = 0; i < COUNT; i++) {
+ listWriter.startList();
+ for (int j = 0; j < i % 7; j++) {
+ innerMapWriter.startMap();
+ for (int k = 0; k < i % 13; k++) {
+ innerMapWriter.startEntry();
+ innerMapWriter.key().integer().writeInt(k);
+ if (k % 2 == 0) {
+ innerMapWriter.value().bigInt().writeBigInt(k);
+ }
+ innerMapWriter.endEntry();
+ }
+ innerMapWriter.endMap();
+ }
+ listWriter.endList();
+ }
+ listWriter.setValueCount(COUNT);
+ checkListMap(listVector);
+
+ // Verify that the map vector has keysSorted = true
+ MapVector mapVector = (MapVector) listVector.getDataVector();
+ ArrowType arrowType = mapVector.getField().getFieldType().getType();
+ assertTrue(((ArrowType.Map) arrowType).getKeysSorted());
+ }
+ }
+
+ private void checkListMap(ListVector listVector) {
+ UnionListReader listReader = new UnionListReader(listVector);
+ for (int i = 0; i < COUNT; i++) {
+ listReader.setPosition(i);
+ for (int j = 0; j < i % 7; j++) {
+ listReader.next();
+ UnionMapReader mapReader = (UnionMapReader) listReader.reader();
+ for (int k = 0; k < i % 13; k++) {
+ mapReader.next();
+ Assert.assertEquals("record key: " + i, k, mapReader.key().readInteger().intValue());
+ if (k % 2 == 0) {
+ Assert.assertEquals("record value: " + i, k, mapReader.value().readLong().longValue());
+ } else {
+ Assert.assertNull("record value: " + i, mapReader.value().readLong());
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void simpleUnion() {
+ UnionVector vector = new UnionVector("union", allocator, /* field type */ null, /* call-back */ null);
+ UnionWriter unionWriter = new UnionWriter(vector);
+ unionWriter.allocate();
+ for (int i = 0; i < COUNT; i++) {
+ unionWriter.setPosition(i);
+ if (i % 2 == 0) {
+ unionWriter.writeInt(i);
+ } else {
+ unionWriter.writeFloat4((float) i);
+ }
+ }
+ vector.setValueCount(COUNT);
+ UnionReader unionReader = new UnionReader(vector);
+ for (int i = 0; i < COUNT; i++) {
+ unionReader.setPosition(i);
+ if (i % 2 == 0) {
+ Assert.assertEquals(i, i, unionReader.readInteger());
+ } else {
+ Assert.assertEquals((float) i, unionReader.readFloat(), 1e-12);
+ }
+ }
+ vector.close();
+ }
+
+ @Test
+ public void promotableWriter() {
+ try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) {
+
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ StructWriter rootWriter = writer.rootAsStruct();
+ for (int i = 0; i < 100; i++) {
+ BigIntWriter bigIntWriter = rootWriter.bigInt("a");
+ bigIntWriter.setPosition(i);
+ bigIntWriter.writeBigInt(i);
+ }
+ Field field = parent.getField().getChildren().get(0).getChildren().get(0);
+ Assert.assertEquals("a", field.getName());
+ Assert.assertEquals(Int.TYPE_TYPE, field.getType().getTypeID());
+ Int intType = (Int) field.getType();
+
+ Assert.assertEquals(64, intType.getBitWidth());
+ Assert.assertTrue(intType.getIsSigned());
+ for (int i = 100; i < 200; i++) {
+ VarCharWriter varCharWriter = rootWriter.varChar("a");
+ varCharWriter.setPosition(i);
+ byte[] bytes = Integer.toString(i).getBytes();
+ ArrowBuf tempBuf = allocator.buffer(bytes.length);
+ tempBuf.setBytes(0, bytes);
+ varCharWriter.writeVarChar(0, bytes.length, tempBuf);
+ tempBuf.close();
+ }
+ field = parent.getField().getChildren().get(0).getChildren().get(0);
+ Assert.assertEquals("a", field.getName());
+ Assert.assertEquals(Union.TYPE_TYPE, field.getType().getTypeID());
+ Assert.assertEquals(Int.TYPE_TYPE, field.getChildren().get(0).getType().getTypeID());
+ Assert.assertEquals(Utf8.TYPE_TYPE, field.getChildren().get(1).getType().getTypeID());
+ StructReader rootReader = new SingleStructReaderImpl(parent).reader("root");
+ for (int i = 0; i < 100; i++) {
+ rootReader.setPosition(i);
+ FieldReader reader = rootReader.reader("a");
+ Long value = reader.readLong();
+ Assert.assertNotNull("index: " + i, value);
+ Assert.assertEquals(i, value.intValue());
+ }
+ for (int i = 100; i < 200; i++) {
+ rootReader.setPosition(i);
+ FieldReader reader = rootReader.reader("a");
+ Text value = reader.readText();
+ Assert.assertEquals(Integer.toString(i), value.toString());
+ }
+ }
+ }
+
+ /**
+ * Even without writing to the writer, the union schema is created correctly.
+ */
+ @Test
+ public void promotableWriterSchema() {
+ try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) {
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ StructWriter rootWriter = writer.rootAsStruct();
+ rootWriter.bigInt("a");
+ rootWriter.varChar("a");
+
+ Field field = parent.getField().getChildren().get(0).getChildren().get(0);
+ Assert.assertEquals("a", field.getName());
+ Assert.assertEquals(ArrowTypeID.Union, field.getType().getTypeID());
+
+ Assert.assertEquals(ArrowTypeID.Int, field.getChildren().get(0).getType().getTypeID());
+ Int intType = (Int) field.getChildren().get(0).getType();
+ Assert.assertEquals(64, intType.getBitWidth());
+ Assert.assertTrue(intType.getIsSigned());
+ Assert.assertEquals(ArrowTypeID.Utf8, field.getChildren().get(1).getType().getTypeID());
+ }
+ }
+
+ private Set<String> getFieldNames(List<Field> fields) {
+ Set<String> fieldNames = new HashSet<>();
+ for (Field field : fields) {
+ fieldNames.add(field.getName());
+ if (!field.getChildren().isEmpty()) {
+ for (String name : getFieldNames(field.getChildren())) {
+ fieldNames.add(field.getName() + "::" + name);
+ }
+ }
+ }
+ return fieldNames;
+ }
+
+ @Test
+ public void structWriterMixedCaseFieldNames() {
+ try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) {
+ // test case-sensitive StructWriter
+ ComplexWriter writer = new ComplexWriterImpl("rootCaseSensitive", parent, false, true);
+ StructWriter rootWriterCaseSensitive = writer.rootAsStruct();
+ rootWriterCaseSensitive.bigInt("int_field");
+ rootWriterCaseSensitive.bigInt("Int_Field");
+ rootWriterCaseSensitive.float4("float_field");
+ rootWriterCaseSensitive.float4("Float_Field");
+ StructWriter structFieldWriterCaseSensitive = rootWriterCaseSensitive.struct("struct_field");
+ structFieldWriterCaseSensitive.varChar("char_field");
+ structFieldWriterCaseSensitive.varChar("Char_Field");
+ ListWriter listFieldWriterCaseSensitive = rootWriterCaseSensitive.list("list_field");
+ StructWriter listStructFieldWriterCaseSensitive = listFieldWriterCaseSensitive.struct();
+ listStructFieldWriterCaseSensitive.bit("bit_field");
+ listStructFieldWriterCaseSensitive.bit("Bit_Field");
+
+ List<Field> fieldsCaseSensitive = parent.getField().getChildren().get(0).getChildren();
+ Set<String> fieldNamesCaseSensitive = getFieldNames(fieldsCaseSensitive);
+ Assert.assertEquals(11, fieldNamesCaseSensitive.size());
+ Assert.assertTrue(fieldNamesCaseSensitive.contains("int_field"));
+ Assert.assertTrue(fieldNamesCaseSensitive.contains("Int_Field"));
+ Assert.assertTrue(fieldNamesCaseSensitive.contains("float_field"));
+ Assert.assertTrue(fieldNamesCaseSensitive.contains("Float_Field"));
+ Assert.assertTrue(fieldNamesCaseSensitive.contains("struct_field"));
+ Assert.assertTrue(fieldNamesCaseSensitive.contains("struct_field::char_field"));
+ Assert.assertTrue(fieldNamesCaseSensitive.contains("struct_field::Char_Field"));
+ Assert.assertTrue(fieldNamesCaseSensitive.contains("list_field"));
+ Assert.assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$"));
+ Assert.assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$::bit_field"));
+ Assert.assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$::Bit_Field"));
+
+ // test case-insensitive StructWriter
+ ComplexWriter writerCaseInsensitive = new ComplexWriterImpl("rootCaseInsensitive", parent, false, false);
+ StructWriter rootWriterCaseInsensitive = writerCaseInsensitive.rootAsStruct();
+
+ rootWriterCaseInsensitive.bigInt("int_field");
+ rootWriterCaseInsensitive.bigInt("Int_Field");
+ rootWriterCaseInsensitive.float4("float_field");
+ rootWriterCaseInsensitive.float4("Float_Field");
+ StructWriter structFieldWriterCaseInsensitive = rootWriterCaseInsensitive.struct("struct_field");
+ structFieldWriterCaseInsensitive.varChar("char_field");
+ structFieldWriterCaseInsensitive.varChar("Char_Field");
+ ListWriter listFieldWriterCaseInsensitive = rootWriterCaseInsensitive.list("list_field");
+ StructWriter listStructFieldWriterCaseInsensitive = listFieldWriterCaseInsensitive.struct();
+ listStructFieldWriterCaseInsensitive.bit("bit_field");
+ listStructFieldWriterCaseInsensitive.bit("Bit_Field");
+
+ List<Field> fieldsCaseInsensitive = parent.getField().getChildren().get(1).getChildren();
+ Set<String> fieldNamesCaseInsensitive = getFieldNames(fieldsCaseInsensitive);
+ Assert.assertEquals(7, fieldNamesCaseInsensitive.size());
+ Assert.assertTrue(fieldNamesCaseInsensitive.contains("int_field"));
+ Assert.assertTrue(fieldNamesCaseInsensitive.contains("float_field"));
+ Assert.assertTrue(fieldNamesCaseInsensitive.contains("struct_field"));
+ Assert.assertTrue(fieldNamesCaseInsensitive.contains("struct_field::char_field"));
+ Assert.assertTrue(fieldNamesCaseSensitive.contains("list_field"));
+ Assert.assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$"));
+ Assert.assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$::bit_field"));
+ }
+ }
+
+ @Test
+ public void timeStampSecWriter() throws Exception {
+ // test values
+ final long expectedSecs = 981173106L;
+ final LocalDateTime expectedSecDateTime = LocalDateTime.of(2001, 2, 3, 4, 5, 6, 0);
+
+ try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) {
+ // write
+
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ StructWriter rootWriter = writer.rootAsStruct();
+
+ {
+ TimeStampSecWriter timeStampSecWriter = rootWriter.timeStampSec("sec");
+ timeStampSecWriter.setPosition(0);
+ timeStampSecWriter.writeTimeStampSec(expectedSecs);
+ }
+ {
+ TimeStampSecTZWriter timeStampSecTZWriter = rootWriter.timeStampSecTZ("secTZ", "UTC");
+ timeStampSecTZWriter.setPosition(1);
+ timeStampSecTZWriter.writeTimeStampSecTZ(expectedSecs);
+ }
+ // schema
+ List<Field> children = parent.getField().getChildren().get(0).getChildren();
+ checkTimestampField(children.get(0), "sec");
+ checkTimestampTZField(children.get(1), "secTZ", "UTC");
+
+ // read
+ StructReader rootReader = new SingleStructReaderImpl(parent).reader("root");
+ {
+ FieldReader secReader = rootReader.reader("sec");
+ secReader.setPosition(0);
+ LocalDateTime secDateTime = secReader.readLocalDateTime();
+ Assert.assertEquals(expectedSecDateTime, secDateTime);
+ long secLong = secReader.readLong();
+ Assert.assertEquals(expectedSecs, secLong);
+ }
+ {
+ FieldReader secTZReader = rootReader.reader("secTZ");
+ secTZReader.setPosition(1);
+ long secTZLong = secTZReader.readLong();
+ Assert.assertEquals(expectedSecs, secTZLong);
+ }
+ }
+ }
+
+ @Test
+ public void timeStampMilliWriters() throws Exception {
+ // test values
+ final long expectedMillis = 981173106123L;
+ final LocalDateTime expectedMilliDateTime = LocalDateTime.of(2001, 2, 3, 4, 5, 6, 123 * 1_000_000);
+
+ try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator);) {
+ // write
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ StructWriter rootWriter = writer.rootAsStruct();
+ {
+ TimeStampMilliWriter timeStampWriter = rootWriter.timeStampMilli("milli");
+ timeStampWriter.setPosition(0);
+ timeStampWriter.writeTimeStampMilli(expectedMillis);
+ }
+ String tz = "UTC";
+ {
+ TimeStampMilliTZWriter timeStampTZWriter = rootWriter.timeStampMilliTZ("milliTZ", tz);
+ timeStampTZWriter.setPosition(0);
+ timeStampTZWriter.writeTimeStampMilliTZ(expectedMillis);
+ }
+ // schema
+ List<Field> children = parent.getField().getChildren().get(0).getChildren();
+ checkTimestampField(children.get(0), "milli");
+ checkTimestampTZField(children.get(1), "milliTZ", tz);
+
+ // read
+ StructReader rootReader = new SingleStructReaderImpl(parent).reader("root");
+
+ {
+ FieldReader milliReader = rootReader.reader("milli");
+ milliReader.setPosition(0);
+ LocalDateTime milliDateTime = milliReader.readLocalDateTime();
+ Assert.assertEquals(expectedMilliDateTime, milliDateTime);
+ long milliLong = milliReader.readLong();
+ Assert.assertEquals(expectedMillis, milliLong);
+ }
+ {
+ FieldReader milliTZReader = rootReader.reader("milliTZ");
+ milliTZReader.setPosition(0);
+ long milliTZLong = milliTZReader.readLong();
+ Assert.assertEquals(expectedMillis, milliTZLong);
+ }
+ }
+ }
+
+ private void checkTimestampField(Field field, String name) {
+ Assert.assertEquals(name, field.getName());
+ Assert.assertEquals(ArrowType.Timestamp.TYPE_TYPE, field.getType().getTypeID());
+ }
+
+ private void checkTimestampTZField(Field field, String name, String tz) {
+ checkTimestampField(field, name);
+ Assert.assertEquals(tz, ((Timestamp) field.getType()).getTimezone());
+ }
+
+ @Test
+ public void timeStampMicroWriters() throws Exception {
+ // test values
+ final long expectedMicros = 981173106123456L;
+ final LocalDateTime expectedMicroDateTime = LocalDateTime.of(2001, 2, 3, 4, 5, 6, 123456 * 1000);
+
+ try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) {
+ // write
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ StructWriter rootWriter = writer.rootAsStruct();
+
+ {
+ TimeStampMicroWriter timeStampMicroWriter = rootWriter.timeStampMicro("micro");
+ timeStampMicroWriter.setPosition(0);
+ timeStampMicroWriter.writeTimeStampMicro(expectedMicros);
+ }
+ String tz = "UTC";
+ {
+ TimeStampMicroTZWriter timeStampMicroWriter = rootWriter.timeStampMicroTZ("microTZ", tz);
+ timeStampMicroWriter.setPosition(1);
+ timeStampMicroWriter.writeTimeStampMicroTZ(expectedMicros);
+ }
+
+ // schema
+ List<Field> children = parent.getField().getChildren().get(0).getChildren();
+ checkTimestampField(children.get(0), "micro");
+ checkTimestampTZField(children.get(1), "microTZ", tz);
+
+ // read
+ StructReader rootReader = new SingleStructReaderImpl(parent).reader("root");
+ {
+ FieldReader microReader = rootReader.reader("micro");
+ microReader.setPosition(0);
+ LocalDateTime microDateTime = microReader.readLocalDateTime();
+ Assert.assertEquals(expectedMicroDateTime, microDateTime);
+ long microLong = microReader.readLong();
+ Assert.assertEquals(expectedMicros, microLong);
+ }
+ {
+ FieldReader microReader = rootReader.reader("microTZ");
+ microReader.setPosition(1);
+ long microLong = microReader.readLong();
+ Assert.assertEquals(expectedMicros, microLong);
+ }
+ }
+ }
+
+ @Test
+ public void timeStampNanoWriters() throws Exception {
+ // test values
+ final long expectedNanos = 981173106123456789L;
+ final LocalDateTime expectedNanoDateTime = LocalDateTime.of(2001, 2, 3, 4, 5, 6, 123456789);
+
+ try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) {
+ // write
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ StructWriter rootWriter = writer.rootAsStruct();
+
+ {
+ TimeStampNanoWriter timeStampNanoWriter = rootWriter.timeStampNano("nano");
+ timeStampNanoWriter.setPosition(0);
+ timeStampNanoWriter.writeTimeStampNano(expectedNanos);
+ }
+ String tz = "UTC";
+ {
+ TimeStampNanoTZWriter timeStampNanoWriter = rootWriter.timeStampNanoTZ("nanoTZ", tz);
+ timeStampNanoWriter.setPosition(0);
+ timeStampNanoWriter.writeTimeStampNanoTZ(expectedNanos);
+ }
+ // schema
+ List<Field> children = parent.getField().getChildren().get(0).getChildren();
+ checkTimestampField(children.get(0), "nano");
+ checkTimestampTZField(children.get(1), "nanoTZ", tz);
+ // read
+ StructReader rootReader = new SingleStructReaderImpl(parent).reader("root");
+
+ {
+ FieldReader nanoReader = rootReader.reader("nano");
+ nanoReader.setPosition(0);
+ LocalDateTime nanoDateTime = nanoReader.readLocalDateTime();
+ Assert.assertEquals(expectedNanoDateTime, nanoDateTime);
+ long nanoLong = nanoReader.readLong();
+ Assert.assertEquals(expectedNanos, nanoLong);
+ }
+ {
+ FieldReader nanoReader = rootReader.reader("nanoTZ");
+ nanoReader.setPosition(0);
+ long nanoLong = nanoReader.readLong();
+ Assert.assertEquals(expectedNanos, nanoLong);
+ NullableTimeStampNanoTZHolder h = new NullableTimeStampNanoTZHolder();
+ nanoReader.read(h);
+ Assert.assertEquals(expectedNanos, h.value);
+ }
+ }
+
+ }
+
+ @Test
+ public void fixedSizeBinaryWriters() throws Exception {
+ // test values
+ int numValues = 10;
+ int byteWidth = 9;
+ byte[][] values = new byte[numValues][byteWidth];
+ for (int i = 0; i < numValues; i++) {
+ for (int j = 0; j < byteWidth; j++) {
+ values[i][j] = ((byte) i);
+ }
+ }
+ ArrowBuf[] bufs = new ArrowBuf[numValues];
+ for (int i = 0; i < numValues; i++) {
+ bufs[i] = allocator.buffer(byteWidth);
+ bufs[i].setBytes(0, values[i]);
+ }
+
+ try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) {
+ // write
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ StructWriter rootWriter = writer.rootAsStruct();
+
+ String fieldName = "fixedSizeBinary";
+ FixedSizeBinaryWriter fixedSizeBinaryWriter = rootWriter.fixedSizeBinary(fieldName, byteWidth);
+ for (int i = 0; i < numValues; i++) {
+ fixedSizeBinaryWriter.setPosition(i);
+ fixedSizeBinaryWriter.writeFixedSizeBinary(bufs[i]);
+ }
+
+ // schema
+ List<Field> children = parent.getField().getChildren().get(0).getChildren();
+ Assert.assertEquals(fieldName, children.get(0).getName());
+ Assert.assertEquals(ArrowType.FixedSizeBinary.TYPE_TYPE, children.get(0).getType().getTypeID());
+
+ // read
+ StructReader rootReader = new SingleStructReaderImpl(parent).reader("root");
+
+ FieldReader fixedSizeBinaryReader = rootReader.reader(fieldName);
+ for (int i = 0; i < numValues; i++) {
+ fixedSizeBinaryReader.setPosition(i);
+ byte[] readValues = fixedSizeBinaryReader.readByteArray();
+ Assert.assertArrayEquals(values[i], readValues);
+ }
+ }
+
+ AutoCloseables.close(bufs);
+ }
+
+ @Test
+ public void complexCopierWithList() {
+ try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) {
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ StructWriter rootWriter = writer.rootAsStruct();
+ ListWriter listWriter = rootWriter.list("list");
+ StructWriter innerStructWriter = listWriter.struct();
+ IntWriter outerIntWriter = listWriter.integer();
+ rootWriter.start();
+ listWriter.startList();
+ outerIntWriter.writeInt(1);
+ outerIntWriter.writeInt(2);
+ innerStructWriter.start();
+ IntWriter intWriter = innerStructWriter.integer("a");
+ intWriter.writeInt(1);
+ innerStructWriter.end();
+ innerStructWriter.start();
+ intWriter = innerStructWriter.integer("a");
+ intWriter.writeInt(2);
+ innerStructWriter.end();
+ listWriter.endList();
+ rootWriter.end();
+ writer.setValueCount(1);
+
+ StructVector structVector = (StructVector) parent.getChild("root");
+ TransferPair tp = structVector.getTransferPair(allocator);
+ tp.splitAndTransfer(0, 1);
+ NonNullableStructVector toStructVector = (NonNullableStructVector) tp.getTo();
+ JsonStringHashMap<?, ?> toMapValue = (JsonStringHashMap<?, ?>) toStructVector.getObject(0);
+ JsonStringArrayList<?> object = (JsonStringArrayList<?>) toMapValue.get("list");
+ assertEquals(1, object.get(0));
+ assertEquals(2, object.get(1));
+ JsonStringHashMap<?, ?> innerStruct = (JsonStringHashMap<?, ?>) object.get(2);
+ assertEquals(1, innerStruct.get("a"));
+ innerStruct = (JsonStringHashMap<?, ?>) object.get(3);
+ assertEquals(2, innerStruct.get("a"));
+ toStructVector.close();
+ }
+ }
+
+ @Test
+ public void testSingleStructWriter1() {
+ /* initialize a SingleStructWriter with empty StructVector and then lazily
+ * create all vectors with expected initialCapacity.
+ */
+ try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) {
+ SingleStructWriter singleStructWriter = new SingleStructWriter(parent);
+
+ int initialCapacity = 1024;
+ singleStructWriter.setInitialCapacity(initialCapacity);
+
+ IntWriter intWriter = singleStructWriter.integer("intField");
+ BigIntWriter bigIntWriter = singleStructWriter.bigInt("bigIntField");
+ Float4Writer float4Writer = singleStructWriter.float4("float4Field");
+ Float8Writer float8Writer = singleStructWriter.float8("float8Field");
+ ListWriter listWriter = singleStructWriter.list("listField");
+ MapWriter mapWriter = singleStructWriter.map("mapField", false);
+
+ int intValue = 100;
+ long bigIntValue = 10000;
+ float float4Value = 100.5f;
+ double float8Value = 100.375;
+
+ for (int i = 0; i < initialCapacity; i++) {
+ singleStructWriter.start();
+
+ intWriter.writeInt(intValue + i);
+ bigIntWriter.writeBigInt(bigIntValue + (long) i);
+ float4Writer.writeFloat4(float4Value + (float) i);
+ float8Writer.writeFloat8(float8Value + (double) i);
+
+ listWriter.setPosition(i);
+ listWriter.startList();
+ listWriter.integer().writeInt(intValue + i);
+ listWriter.integer().writeInt(intValue + i + 1);
+ listWriter.integer().writeInt(intValue + i + 2);
+ listWriter.integer().writeInt(intValue + i + 3);
+ listWriter.endList();
+
+ mapWriter.setPosition(i);
+ mapWriter.startMap();
+ mapWriter.startEntry();
+ mapWriter.key().integer().writeInt(intValue + i);
+ mapWriter.value().integer().writeInt(intValue + i + 1);
+ mapWriter.endEntry();
+ mapWriter.startEntry();
+ mapWriter.key().integer().writeInt(intValue + i + 2);
+ mapWriter.value().integer().writeInt(intValue + i + 3);
+ mapWriter.endEntry();
+ mapWriter.endMap();
+
+ singleStructWriter.end();
+ }
+
+ IntVector intVector = (IntVector) parent.getChild("intField");
+ BigIntVector bigIntVector = (BigIntVector) parent.getChild("bigIntField");
+ Float4Vector float4Vector = (Float4Vector) parent.getChild("float4Field");
+ Float8Vector float8Vector = (Float8Vector) parent.getChild("float8Field");
+
+ int capacity = singleStructWriter.getValueCapacity();
+ assertTrue(capacity >= initialCapacity && capacity < initialCapacity * 2);
+ capacity = intVector.getValueCapacity();
+ assertTrue(capacity >= initialCapacity && capacity < initialCapacity * 2);
+ capacity = bigIntVector.getValueCapacity();
+ assertTrue(capacity >= initialCapacity && capacity < initialCapacity * 2);
+ capacity = float4Vector.getValueCapacity();
+ assertTrue(capacity >= initialCapacity && capacity < initialCapacity * 2);
+ capacity = float8Vector.getValueCapacity();
+ assertTrue(capacity >= initialCapacity && capacity < initialCapacity * 2);
+
+ StructReader singleStructReader = new SingleStructReaderImpl(parent);
+
+ IntReader intReader = singleStructReader.reader("intField");
+ BigIntReader bigIntReader = singleStructReader.reader("bigIntField");
+ Float4Reader float4Reader = singleStructReader.reader("float4Field");
+ Float8Reader float8Reader = singleStructReader.reader("float8Field");
+ UnionListReader listReader = (UnionListReader) singleStructReader.reader("listField");
+ UnionMapReader mapReader = (UnionMapReader) singleStructReader.reader("mapField");
+
+ for (int i = 0; i < initialCapacity; i++) {
+ intReader.setPosition(i);
+ bigIntReader.setPosition(i);
+ float4Reader.setPosition(i);
+ float8Reader.setPosition(i);
+ listReader.setPosition(i);
+ mapReader.setPosition(i);
+
+ assertEquals(intValue + i, intReader.readInteger().intValue());
+ assertEquals(bigIntValue + (long) i, bigIntReader.readLong().longValue());
+ assertEquals(float4Value + (float) i, float4Reader.readFloat().floatValue(), 0);
+ assertEquals(float8Value + (double) i, float8Reader.readDouble().doubleValue(), 0);
+
+ for (int j = 0; j < 4; j++) {
+ listReader.next();
+ assertEquals(intValue + i + j, listReader.reader().readInteger().intValue());
+ }
+
+ for (int k = 0; k < 4; k += 2) {
+ mapReader.next();
+ assertEquals(intValue + k + i, mapReader.key().readInteger().intValue());
+ assertEquals(intValue + k + i + 1, mapReader.value().readInteger().intValue());
+ }
+ }
+ }
+
+
+ }
+
+ @Test
+ public void testListWriterWithNulls() {
+ try (ListVector listVector = ListVector.empty("list", allocator)) {
+ listVector.setInitialCapacity(COUNT);
+ listVector.allocateNew();
+ listVector.getValidityBuffer().setOne(0, (int) listVector.getValidityBuffer().capacity());
+
+ UnionListWriter listWriter = listVector.getWriter();
+
+ // expected listVector : [[null], null, [2, 4], null, [null], null, [6, 12], ...]
+ for (int i = 0; i < COUNT; i++) {
+ listWriter.setPosition(i);
+ if (i % 2 == 0) {
+ listWriter.startList();
+ if (i % 4 == 0) {
+ listWriter.integer().writeNull();
+ } else {
+ listWriter.integer().writeInt(i);
+ listWriter.integer().writeInt(i * 2);
+ }
+ listWriter.endList();
+ } else {
+ listWriter.writeNull();
+ }
+ }
+ listVector.setValueCount(COUNT);
+
+ UnionListReader listReader = new UnionListReader(listVector);
+ for (int i = 0; i < COUNT; i++) {
+ listReader.setPosition(i);
+ if (i % 2 == 0) {
+ Assert.assertTrue(listReader.isSet());
+ listReader.next();
+ if (i % 4 == 0) {
+ Assert.assertNull(listReader.reader().readInteger());
+ } else {
+ Assert.assertEquals(i, listReader.reader().readInteger().intValue());
+ listReader.next();
+ Assert.assertEquals(i * 2, listReader.reader().readInteger().intValue());
+ }
+ } else {
+ Assert.assertFalse(listReader.isSet());
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testListOfListWriterWithNulls() {
+ try (ListVector listVector = ListVector.empty("listoflist", allocator)) {
+ listVector.setInitialCapacity(COUNT);
+ listVector.allocateNew();
+ listVector.getValidityBuffer().setOne(0, (int) listVector.getValidityBuffer().capacity());
+
+ UnionListWriter listWriter = listVector.getWriter();
+
+ // create list : [ [null], null, [[null, 2, 4]], null, [null], null, [[null, 6, 12]], ... ]
+ for (int i = 0; i < COUNT; i++) {
+ listWriter.setPosition(i);
+ if (i % 2 == 0) {
+ listWriter.startList();
+ if (i % 4 == 0) {
+ listWriter.list().writeNull();
+ } else {
+ listWriter.list().startList();
+ listWriter.list().integer().writeNull();
+ listWriter.list().integer().writeInt(i);
+ listWriter.list().integer().writeInt(i * 2);
+ listWriter.list().endList();
+ }
+ listWriter.endList();
+ } else {
+ listWriter.writeNull();
+ }
+ }
+ listVector.setValueCount(COUNT);
+
+ UnionListReader listReader = new UnionListReader(listVector);
+ for (int i = 0; i < COUNT; i++) {
+ listReader.setPosition(i);
+ if (i % 2 == 0) {
+ Assert.assertTrue(listReader.isSet());
+ listReader.next();
+ if (i % 4 == 0) {
+ Assert.assertFalse(listReader.reader().isSet());
+ } else {
+ listReader.reader().next();
+ Assert.assertFalse(listReader.reader().reader().isSet());
+ listReader.reader().next();
+ Assert.assertEquals(i, listReader.reader().reader().readInteger().intValue());
+ listReader.reader().next();
+ Assert.assertEquals(i * 2, listReader.reader().reader().readInteger().intValue());
+ }
+ } else {
+ Assert.assertFalse(listReader.isSet());
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testListOfListOfListWriterWithNulls() {
+ try (ListVector listVector = ListVector.empty("listoflistoflist", allocator)) {
+ listVector.setInitialCapacity(COUNT);
+ listVector.allocateNew();
+ listVector.getValidityBuffer().setOne(0, (int) listVector.getValidityBuffer().capacity());
+
+ UnionListWriter listWriter = listVector.getWriter();
+
+ // create list : [ null, [null], [[null]], [[[null, 1, 2]]], null, [null], ...
+ for (int i = 0; i < COUNT; i++) {
+ listWriter.setPosition(i);
+ if (i % 4 == 0) {
+ listWriter.writeNull();
+ } else {
+ listWriter.startList();
+ if (i % 4 == 1) {
+ listWriter.list().writeNull();
+ } else if (i % 4 == 2) {
+ listWriter.list().startList();
+ listWriter.list().list().writeNull();
+ listWriter.list().endList();
+ } else {
+ listWriter.list().startList();
+ listWriter.list().list().startList();
+ listWriter.list().list().integer().writeNull();
+ listWriter.list().list().integer().writeInt(i);
+ listWriter.list().list().integer().writeInt(i * 2);
+ listWriter.list().list().endList();
+ listWriter.list().endList();
+ }
+ listWriter.endList();
+ }
+ }
+ listVector.setValueCount(COUNT);
+
+ UnionListReader listReader = new UnionListReader(listVector);
+ for (int i = 0; i < COUNT; i++) {
+ listReader.setPosition(i);
+ if (i % 4 == 0) {
+ Assert.assertFalse(listReader.isSet());
+ } else {
+ Assert.assertTrue(listReader.isSet());
+ listReader.next();
+ if (i % 4 == 1) {
+ Assert.assertFalse(listReader.reader().isSet());
+ } else if (i % 4 == 2) {
+ listReader.reader().next();
+ Assert.assertFalse(listReader.reader().reader().isSet());
+ } else {
+ listReader.reader().next();
+ listReader.reader().reader().next();
+ Assert.assertFalse(listReader.reader().reader().reader().isSet());
+ listReader.reader().reader().next();
+ Assert.assertEquals(i, listReader.reader().reader().reader().readInteger().intValue());
+ listReader.reader().reader().next();
+ Assert.assertEquals(i * 2, listReader.reader().reader().reader().readInteger().intValue());
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java
new file mode 100644
index 000000000..8663c0c49
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java
@@ -0,0 +1,849 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import static org.apache.arrow.vector.TestUtils.newVarCharVector;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.nio.charset.StandardCharsets;
+import java.time.LocalDateTime;
+import java.time.LocalTime;
+import java.time.ZoneId;
+import java.time.ZoneOffset;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.util.Collections2;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.DateMilliVector;
+import org.apache.arrow.vector.DecimalVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.TimeMilliVector;
+import org.apache.arrow.vector.UInt1Vector;
+import org.apache.arrow.vector.UInt2Vector;
+import org.apache.arrow.vector.UInt4Vector;
+import org.apache.arrow.vector.UInt8Vector;
+import org.apache.arrow.vector.VarBinaryVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.MapVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.impl.ComplexWriterImpl;
+import org.apache.arrow.vector.complex.impl.UnionListWriter;
+import org.apache.arrow.vector.complex.impl.UnionMapReader;
+import org.apache.arrow.vector.complex.impl.UnionMapWriter;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.complex.writer.BaseWriter.ComplexWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter;
+import org.apache.arrow.vector.complex.writer.BigIntWriter;
+import org.apache.arrow.vector.complex.writer.DateMilliWriter;
+import org.apache.arrow.vector.complex.writer.Float4Writer;
+import org.apache.arrow.vector.complex.writer.IntWriter;
+import org.apache.arrow.vector.complex.writer.TimeMilliWriter;
+import org.apache.arrow.vector.complex.writer.TimeStampMilliTZWriter;
+import org.apache.arrow.vector.complex.writer.TimeStampMilliWriter;
+import org.apache.arrow.vector.complex.writer.TimeStampNanoWriter;
+import org.apache.arrow.vector.complex.writer.UInt1Writer;
+import org.apache.arrow.vector.complex.writer.UInt2Writer;
+import org.apache.arrow.vector.complex.writer.UInt4Writer;
+import org.apache.arrow.vector.complex.writer.UInt8Writer;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryEncoder;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.holders.NullableTimeStampMilliHolder;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.JsonStringArrayList;
+import org.apache.arrow.vector.util.Text;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Helps testing the file formats.
+ */
+public class BaseFileTest {
+ private static final Logger LOGGER = LoggerFactory.getLogger(BaseFileTest.class);
+ protected static final int COUNT = 10;
+ protected BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Integer.MAX_VALUE);
+ }
+
+ @After
+ public void tearDown() {
+ allocator.close();
+ }
+
+
+ private static short [] uint1Values = new short[]{0, 255, 1, 128, 2};
+ private static char [] uint2Values = new char[]{0, Character.MAX_VALUE, 1, Short.MAX_VALUE * 2, 2};
+ private static long [] uint4Values = new long[]{0, Integer.MAX_VALUE + 1L, 1, Integer.MAX_VALUE * 2L, 2};
+ private static BigInteger[] uint8Values = new BigInteger[]{BigInteger.valueOf(0),
+ BigInteger.valueOf(Long.MAX_VALUE).multiply(BigInteger.valueOf(2)), BigInteger.valueOf(2),
+ BigInteger.valueOf(Long.MAX_VALUE).add(BigInteger.valueOf(1)), BigInteger.valueOf(2)};
+
+ protected void writeData(int count, StructVector parent) {
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ StructWriter rootWriter = writer.rootAsStruct();
+ IntWriter intWriter = rootWriter.integer("int");
+ UInt1Writer uint1Writer = rootWriter.uInt1("uint1");
+ UInt2Writer uint2Writer = rootWriter.uInt2("uint2");
+ UInt4Writer uint4Writer = rootWriter.uInt4("uint4");
+ UInt8Writer uint8Writer = rootWriter.uInt8("uint8");
+ BigIntWriter bigIntWriter = rootWriter.bigInt("bigInt");
+ Float4Writer float4Writer = rootWriter.float4("float");
+ for (int i = 0; i < count; i++) {
+ intWriter.setPosition(i);
+ intWriter.writeInt(i);
+ uint1Writer.setPosition(i);
+ // TODO: Fix add safe write methods on uint methods.
+ uint1Writer.setPosition(i);
+ uint1Writer.writeUInt1((byte) uint1Values[i % uint1Values.length] );
+ uint2Writer.setPosition(i);
+ uint2Writer.writeUInt2((char) uint2Values[i % uint2Values.length] );
+ uint4Writer.setPosition(i);
+ uint4Writer.writeUInt4((int) uint4Values[i % uint4Values.length] );
+ uint8Writer.setPosition(i);
+ uint8Writer.writeUInt8(uint8Values[i % uint8Values.length].longValue());
+ bigIntWriter.setPosition(i);
+ bigIntWriter.writeBigInt(i);
+ float4Writer.setPosition(i);
+ float4Writer.writeFloat4(i == 0 ? Float.NaN : i);
+ }
+ writer.setValueCount(count);
+ }
+
+
+ protected void validateContent(int count, VectorSchemaRoot root) {
+ for (int i = 0; i < count; i++) {
+ Assert.assertEquals(i, root.getVector("int").getObject(i));
+ Assert.assertEquals((Short) uint1Values[i % uint1Values.length],
+ ((UInt1Vector) root.getVector("uint1")).getObjectNoOverflow(i));
+ Assert.assertEquals("Failed for index: " + i, (Character) uint2Values[i % uint2Values.length],
+ (Character) ((UInt2Vector) root.getVector("uint2")).get(i));
+ Assert.assertEquals("Failed for index: " + i, (Long) uint4Values[i % uint4Values.length],
+ ((UInt4Vector) root.getVector("uint4")).getObjectNoOverflow(i));
+ Assert.assertEquals("Failed for index: " + i, uint8Values[i % uint8Values.length],
+ ((UInt8Vector) root.getVector("uint8")).getObjectNoOverflow(i));
+ Assert.assertEquals(Long.valueOf(i), root.getVector("bigInt").getObject(i));
+ Assert.assertEquals(i == 0 ? Float.NaN : i, root.getVector("float").getObject(i));
+ }
+ }
+
+ protected void writeComplexData(int count, StructVector parent) {
+ ArrowBuf varchar = allocator.buffer(3);
+ varchar.readerIndex(0);
+ varchar.setByte(0, 'a');
+ varchar.setByte(1, 'b');
+ varchar.setByte(2, 'c');
+ varchar.writerIndex(3);
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ StructWriter rootWriter = writer.rootAsStruct();
+ IntWriter intWriter = rootWriter.integer("int");
+ BigIntWriter bigIntWriter = rootWriter.bigInt("bigInt");
+ ListWriter listWriter = rootWriter.list("list");
+ StructWriter structWriter = rootWriter.struct("struct");
+ for (int i = 0; i < count; i++) {
+ if (i % 5 != 3) {
+ intWriter.setPosition(i);
+ intWriter.writeInt(i);
+ }
+ bigIntWriter.setPosition(i);
+ bigIntWriter.writeBigInt(i);
+ listWriter.setPosition(i);
+ listWriter.startList();
+ for (int j = 0; j < i % 3; j++) {
+ listWriter.varChar().writeVarChar(0, 3, varchar);
+ }
+ listWriter.endList();
+ structWriter.setPosition(i);
+ structWriter.start();
+ structWriter.timeStampMilli("timestamp").writeTimeStampMilli(i);
+ structWriter.end();
+ }
+ writer.setValueCount(count);
+ varchar.getReferenceManager().release();
+ }
+
+ public void printVectors(List<FieldVector> vectors) {
+ for (FieldVector vector : vectors) {
+ LOGGER.debug(vector.getField().getName());
+ int valueCount = vector.getValueCount();
+ for (int i = 0; i < valueCount; i++) {
+ LOGGER.debug(String.valueOf(vector.getObject(i)));
+ }
+ }
+ }
+
+ protected void validateComplexContent(int count, VectorSchemaRoot root) {
+ Assert.assertEquals(count, root.getRowCount());
+ printVectors(root.getFieldVectors());
+ for (int i = 0; i < count; i++) {
+
+ Object intVal = root.getVector("int").getObject(i);
+ if (i % 5 != 3) {
+ Assert.assertEquals(i, intVal);
+ } else {
+ Assert.assertNull(intVal);
+ }
+ Assert.assertEquals(Long.valueOf(i), root.getVector("bigInt").getObject(i));
+ Assert.assertEquals(i % 3, ((List<?>) root.getVector("list").getObject(i)).size());
+ NullableTimeStampMilliHolder h = new NullableTimeStampMilliHolder();
+ FieldReader structReader = root.getVector("struct").getReader();
+ structReader.setPosition(i);
+ structReader.reader("timestamp").read(h);
+ Assert.assertEquals(i, h.value);
+ }
+ }
+
+ private LocalDateTime makeDateTimeFromCount(int i) {
+ return LocalDateTime.of(2000 + i, 1 + i, 1 + i, i, i, i, i * 100_000_000 + i);
+ }
+
+ protected void writeDateTimeData(int count, StructVector parent) {
+ Assert.assertTrue(count < 100);
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ StructWriter rootWriter = writer.rootAsStruct();
+ DateMilliWriter dateWriter = rootWriter.dateMilli("date");
+ TimeMilliWriter timeWriter = rootWriter.timeMilli("time");
+ TimeStampMilliWriter timeStampMilliWriter = rootWriter.timeStampMilli("timestamp-milli");
+ TimeStampMilliTZWriter timeStampMilliTZWriter = rootWriter.timeStampMilliTZ("timestamp-milliTZ", "Europe/Paris");
+ TimeStampNanoWriter timeStampNanoWriter = rootWriter.timeStampNano("timestamp-nano");
+ for (int i = 0; i < count; i++) {
+ LocalDateTime dt = makeDateTimeFromCount(i);
+ // Number of days in milliseconds since epoch, stored as 64-bit integer, only date part is used
+ dateWriter.setPosition(i);
+ long dateLong = dt.toLocalDate().atStartOfDay().toInstant(ZoneOffset.UTC).toEpochMilli();
+ dateWriter.writeDateMilli(dateLong);
+ // Time is a value in milliseconds since midnight, stored as 32-bit integer
+ timeWriter.setPosition(i);
+ int milliOfDay = (int) java.util.concurrent.TimeUnit.NANOSECONDS.toMillis(dt.toLocalTime().toNanoOfDay());
+ timeWriter.writeTimeMilli(milliOfDay);
+ // Timestamp as milliseconds since the epoch, stored as 64-bit integer
+ timeStampMilliWriter.setPosition(i);
+ timeStampMilliWriter.writeTimeStampMilli(dt.toInstant(ZoneOffset.UTC).toEpochMilli());
+ // Timestamp as milliseconds since epoch with timezone
+ timeStampMilliTZWriter.setPosition(i);
+ timeStampMilliTZWriter.writeTimeStampMilliTZ(dt.atZone(ZoneId.of("Europe/Paris")).toInstant().toEpochMilli());
+ // Timestamp as nanoseconds since epoch
+ timeStampNanoWriter.setPosition(i);
+ long tsNanos = dt.toInstant(ZoneOffset.UTC).toEpochMilli() * 1_000_000 + i; // need to add back in nano val
+ timeStampNanoWriter.writeTimeStampNano(tsNanos);
+ }
+ writer.setValueCount(count);
+ }
+
+ protected void validateDateTimeContent(int count, VectorSchemaRoot root) {
+ Assert.assertEquals(count, root.getRowCount());
+ printVectors(root.getFieldVectors());
+ for (int i = 0; i < count; i++) {
+ LocalDateTime dt = makeDateTimeFromCount(i);
+ LocalDateTime dtMilli = dt.minusNanos(i);
+ LocalDateTime dateVal = ((DateMilliVector) root.getVector("date")).getObject(i);
+ LocalDateTime dateExpected = dt.toLocalDate().atStartOfDay();
+ Assert.assertEquals(dateExpected, dateVal);
+ LocalTime timeVal = ((TimeMilliVector) root.getVector("time")).getObject(i).toLocalTime();
+ Assert.assertEquals(dtMilli.toLocalTime(), timeVal);
+ Object timestampMilliVal = root.getVector("timestamp-milli").getObject(i);
+ Assert.assertEquals(dtMilli, timestampMilliVal);
+ Object timestampMilliTZVal = root.getVector("timestamp-milliTZ").getObject(i);
+ Assert.assertEquals(dt.atZone(ZoneId.of("Europe/Paris")).toInstant().toEpochMilli(), timestampMilliTZVal);
+ Object timestampNanoVal = root.getVector("timestamp-nano").getObject(i);
+ Assert.assertEquals(dt, timestampNanoVal);
+ }
+ }
+
+ protected VectorSchemaRoot writeFlatDictionaryData(
+ BufferAllocator bufferAllocator,
+ DictionaryProvider.MapDictionaryProvider provider) {
+
+ // Define dictionaries and add to provider
+ VarCharVector dictionary1Vector = newVarCharVector("D1", bufferAllocator);
+ dictionary1Vector.allocateNewSafe();
+ dictionary1Vector.set(0, "foo".getBytes(StandardCharsets.UTF_8));
+ dictionary1Vector.set(1, "bar".getBytes(StandardCharsets.UTF_8));
+ dictionary1Vector.set(2, "baz".getBytes(StandardCharsets.UTF_8));
+ dictionary1Vector.setValueCount(3);
+
+ Dictionary dictionary1 = new Dictionary(dictionary1Vector, new DictionaryEncoding(1L, false, null));
+ provider.put(dictionary1);
+
+ VarCharVector dictionary2Vector = newVarCharVector("D2", bufferAllocator);
+ dictionary2Vector.allocateNewSafe();
+ dictionary2Vector.set(0, "micro".getBytes(StandardCharsets.UTF_8));
+ dictionary2Vector.set(1, "small".getBytes(StandardCharsets.UTF_8));
+ dictionary2Vector.set(2, "large".getBytes(StandardCharsets.UTF_8));
+ dictionary2Vector.setValueCount(3);
+
+ Dictionary dictionary2 = new Dictionary(dictionary2Vector, new DictionaryEncoding(2L, false, null));
+ provider.put(dictionary2);
+
+ // Populate the vectors
+ VarCharVector vector1A = newVarCharVector("varcharA", bufferAllocator);
+ vector1A.allocateNewSafe();
+ vector1A.set(0, "foo".getBytes(StandardCharsets.UTF_8));
+ vector1A.set(1, "bar".getBytes(StandardCharsets.UTF_8));
+ vector1A.set(3, "baz".getBytes(StandardCharsets.UTF_8));
+ vector1A.set(4, "bar".getBytes(StandardCharsets.UTF_8));
+ vector1A.set(5, "baz".getBytes(StandardCharsets.UTF_8));
+ vector1A.setValueCount(6);
+
+ FieldVector encodedVector1A = (FieldVector) DictionaryEncoder.encode(vector1A, dictionary1);
+ vector1A.close(); // Done with this vector after encoding
+
+ // Write this vector using indices instead of encoding
+ IntVector encodedVector1B = new IntVector("varcharB", bufferAllocator);
+ encodedVector1B.allocateNewSafe();
+ encodedVector1B.set(0, 2); // "baz"
+ encodedVector1B.set(1, 1); // "bar"
+ encodedVector1B.set(2, 2); // "baz"
+ encodedVector1B.set(4, 1); // "bar"
+ encodedVector1B.set(5, 0); // "foo"
+ encodedVector1B.setValueCount(6);
+
+ VarCharVector vector2 = newVarCharVector("sizes", bufferAllocator);
+ vector2.allocateNewSafe();
+ vector2.set(1, "large".getBytes(StandardCharsets.UTF_8));
+ vector2.set(2, "small".getBytes(StandardCharsets.UTF_8));
+ vector2.set(3, "small".getBytes(StandardCharsets.UTF_8));
+ vector2.set(4, "large".getBytes(StandardCharsets.UTF_8));
+ vector2.setValueCount(6);
+
+ FieldVector encodedVector2 = (FieldVector) DictionaryEncoder.encode(vector2, dictionary2);
+ vector2.close(); // Done with this vector after encoding
+
+ List<Field> fields = Arrays.asList(encodedVector1A.getField(), encodedVector1B.getField(),
+ encodedVector2.getField());
+ List<FieldVector> vectors = Collections2.asImmutableList(encodedVector1A, encodedVector1B, encodedVector2);
+
+ return new VectorSchemaRoot(fields, vectors, encodedVector1A.getValueCount());
+ }
+
+ protected void validateFlatDictionary(VectorSchemaRoot root, DictionaryProvider provider) {
+ FieldVector vector1A = root.getVector("varcharA");
+ Assert.assertNotNull(vector1A);
+
+ DictionaryEncoding encoding1A = vector1A.getField().getDictionary();
+ Assert.assertNotNull(encoding1A);
+ Assert.assertEquals(1L, encoding1A.getId());
+
+ Assert.assertEquals(6, vector1A.getValueCount());
+ Assert.assertEquals(0, vector1A.getObject(0));
+ Assert.assertEquals(1, vector1A.getObject(1));
+ Assert.assertEquals(null, vector1A.getObject(2));
+ Assert.assertEquals(2, vector1A.getObject(3));
+ Assert.assertEquals(1, vector1A.getObject(4));
+ Assert.assertEquals(2, vector1A.getObject(5));
+
+ FieldVector vector1B = root.getVector("varcharB");
+ Assert.assertNotNull(vector1B);
+
+ DictionaryEncoding encoding1B = vector1A.getField().getDictionary();
+ Assert.assertNotNull(encoding1B);
+ Assert.assertTrue(encoding1A.equals(encoding1B));
+ Assert.assertEquals(1L, encoding1B.getId());
+
+ Assert.assertEquals(6, vector1B.getValueCount());
+ Assert.assertEquals(2, vector1B.getObject(0));
+ Assert.assertEquals(1, vector1B.getObject(1));
+ Assert.assertEquals(2, vector1B.getObject(2));
+ Assert.assertEquals(null, vector1B.getObject(3));
+ Assert.assertEquals(1, vector1B.getObject(4));
+ Assert.assertEquals(0, vector1B.getObject(5));
+
+ FieldVector vector2 = root.getVector("sizes");
+ Assert.assertNotNull(vector2);
+
+ DictionaryEncoding encoding2 = vector2.getField().getDictionary();
+ Assert.assertNotNull(encoding2);
+ Assert.assertEquals(2L, encoding2.getId());
+
+ Assert.assertEquals(6, vector2.getValueCount());
+ Assert.assertEquals(null, vector2.getObject(0));
+ Assert.assertEquals(2, vector2.getObject(1));
+ Assert.assertEquals(1, vector2.getObject(2));
+ Assert.assertEquals(1, vector2.getObject(3));
+ Assert.assertEquals(2, vector2.getObject(4));
+ Assert.assertEquals(null, vector2.getObject(5));
+
+ Dictionary dictionary1 = provider.lookup(1L);
+ Assert.assertNotNull(dictionary1);
+ VarCharVector dictionaryVector = ((VarCharVector) dictionary1.getVector());
+ Assert.assertEquals(3, dictionaryVector.getValueCount());
+ Assert.assertEquals(new Text("foo"), dictionaryVector.getObject(0));
+ Assert.assertEquals(new Text("bar"), dictionaryVector.getObject(1));
+ Assert.assertEquals(new Text("baz"), dictionaryVector.getObject(2));
+
+ Dictionary dictionary2 = provider.lookup(2L);
+ Assert.assertNotNull(dictionary2);
+ dictionaryVector = ((VarCharVector) dictionary2.getVector());
+ Assert.assertEquals(3, dictionaryVector.getValueCount());
+ Assert.assertEquals(new Text("micro"), dictionaryVector.getObject(0));
+ Assert.assertEquals(new Text("small"), dictionaryVector.getObject(1));
+ Assert.assertEquals(new Text("large"), dictionaryVector.getObject(2));
+ }
+
+ protected VectorSchemaRoot writeNestedDictionaryData(
+ BufferAllocator bufferAllocator,
+ DictionaryProvider.MapDictionaryProvider provider) {
+
+ // Define the dictionary and add to the provider
+ VarCharVector dictionaryVector = newVarCharVector("D2", bufferAllocator);
+ dictionaryVector.allocateNewSafe();
+ dictionaryVector.set(0, "foo".getBytes(StandardCharsets.UTF_8));
+ dictionaryVector.set(1, "bar".getBytes(StandardCharsets.UTF_8));
+ dictionaryVector.setValueCount(2);
+
+ Dictionary dictionary = new Dictionary(dictionaryVector, new DictionaryEncoding(2L, false, null));
+ provider.put(dictionary);
+
+ // Write the vector data using dictionary indices
+ ListVector listVector = ListVector.empty("list", bufferAllocator);
+ DictionaryEncoding encoding = dictionary.getEncoding();
+ listVector.addOrGetVector(new FieldType(true, encoding.getIndexType(), encoding));
+ listVector.allocateNew();
+ UnionListWriter listWriter = new UnionListWriter(listVector);
+ listWriter.startList();
+ listWriter.writeInt(0);
+ listWriter.writeInt(1);
+ listWriter.endList();
+ listWriter.startList();
+ listWriter.writeInt(0);
+ listWriter.endList();
+ listWriter.startList();
+ listWriter.writeInt(1);
+ listWriter.endList();
+ listWriter.setValueCount(3);
+
+ List<Field> fields = Collections2.asImmutableList(listVector.getField());
+ List<FieldVector> vectors = Collections2.asImmutableList(listVector);
+ return new VectorSchemaRoot(fields, vectors, 3);
+ }
+
+ protected void validateNestedDictionary(VectorSchemaRoot root, DictionaryProvider provider) {
+ FieldVector vector = root.getFieldVectors().get(0);
+ Assert.assertNotNull(vector);
+ Assert.assertNull(vector.getField().getDictionary());
+ Field nestedField = vector.getField().getChildren().get(0);
+
+ DictionaryEncoding encoding = nestedField.getDictionary();
+ Assert.assertNotNull(encoding);
+ Assert.assertEquals(2L, encoding.getId());
+ Assert.assertEquals(new ArrowType.Int(32, true), encoding.getIndexType());
+
+ Assert.assertEquals(3, vector.getValueCount());
+ Assert.assertEquals(Arrays.asList(0, 1), vector.getObject(0));
+ Assert.assertEquals(Arrays.asList(0), vector.getObject(1));
+ Assert.assertEquals(Arrays.asList(1), vector.getObject(2));
+
+ Dictionary dictionary = provider.lookup(2L);
+ Assert.assertNotNull(dictionary);
+ VarCharVector dictionaryVector = ((VarCharVector) dictionary.getVector());
+ Assert.assertEquals(2, dictionaryVector.getValueCount());
+ Assert.assertEquals(new Text("foo"), dictionaryVector.getObject(0));
+ Assert.assertEquals(new Text("bar"), dictionaryVector.getObject(1));
+ }
+
+ protected VectorSchemaRoot writeDecimalData(BufferAllocator bufferAllocator) {
+ DecimalVector decimalVector1 = new DecimalVector("decimal1", bufferAllocator, 10, 3);
+ DecimalVector decimalVector2 = new DecimalVector("decimal2", bufferAllocator, 4, 2);
+ DecimalVector decimalVector3 = new DecimalVector("decimal3", bufferAllocator, 16, 8);
+
+ int count = 10;
+ decimalVector1.allocateNew(count);
+ decimalVector2.allocateNew(count);
+ decimalVector3.allocateNew(count);
+
+ for (int i = 0; i < count; i++) {
+ decimalVector1.setSafe(i, new BigDecimal(BigInteger.valueOf(i), 3));
+ decimalVector2.setSafe(i, new BigDecimal(BigInteger.valueOf(i * (1 << 10)), 2));
+ decimalVector3.setSafe(i, new BigDecimal(BigInteger.valueOf(i * 1111111111111111L), 8));
+ }
+
+ decimalVector1.setValueCount(count);
+ decimalVector2.setValueCount(count);
+ decimalVector3.setValueCount(count);
+
+ List<Field> fields = Collections2.asImmutableList(decimalVector1.getField(), decimalVector2.getField(),
+ decimalVector3.getField());
+ List<FieldVector> vectors = Collections2.asImmutableList(decimalVector1, decimalVector2, decimalVector3);
+ return new VectorSchemaRoot(fields, vectors, count);
+ }
+
+ protected void validateDecimalData(VectorSchemaRoot root) {
+ DecimalVector decimalVector1 = (DecimalVector) root.getVector("decimal1");
+ DecimalVector decimalVector2 = (DecimalVector) root.getVector("decimal2");
+ DecimalVector decimalVector3 = (DecimalVector) root.getVector("decimal3");
+ int count = 10;
+ Assert.assertEquals(count, root.getRowCount());
+
+ for (int i = 0; i < count; i++) {
+ // Verify decimal 1 vector
+ BigDecimal readValue = decimalVector1.getObject(i);
+ ArrowType.Decimal type = (ArrowType.Decimal) decimalVector1.getField().getType();
+ BigDecimal genValue = new BigDecimal(BigInteger.valueOf(i), type.getScale());
+ Assert.assertEquals(genValue, readValue);
+
+ // Verify decimal 2 vector
+ readValue = decimalVector2.getObject(i);
+ type = (ArrowType.Decimal) decimalVector2.getField().getType();
+ genValue = new BigDecimal(BigInteger.valueOf(i * (1 << 10)), type.getScale());
+ Assert.assertEquals(genValue, readValue);
+
+ // Verify decimal 3 vector
+ readValue = decimalVector3.getObject(i);
+ type = (ArrowType.Decimal) decimalVector3.getField().getType();
+ genValue = new BigDecimal(BigInteger.valueOf(i * 1111111111111111L), type.getScale());
+ Assert.assertEquals(genValue, readValue);
+ }
+ }
+
+ protected VectorSchemaRoot writeNullData(int valueCount) {
+ NullVector nullVector1 = new NullVector("vector1");
+ NullVector nullVector2 = new NullVector("vector2");
+ nullVector1.setValueCount(valueCount);
+ nullVector2.setValueCount(valueCount);
+
+ List<Field> fields = Collections2.asImmutableList(nullVector1.getField(), nullVector2.getField());
+ List<FieldVector> vectors = Collections2.asImmutableList(nullVector1, nullVector2);
+ return new VectorSchemaRoot(fields, vectors, valueCount);
+ }
+
+ protected void validateNullData(VectorSchemaRoot root, int valueCount) {
+
+ NullVector vector1 = (NullVector) root.getFieldVectors().get(0);
+ NullVector vector2 = (NullVector) root.getFieldVectors().get(1);
+
+ assertEquals(valueCount, vector1.getValueCount());
+ assertEquals(valueCount, vector2.getValueCount());
+ }
+
+ public void validateUnionData(int count, VectorSchemaRoot root) {
+ FieldReader unionReader = root.getVector("union").getReader();
+ for (int i = 0; i < count; i++) {
+ unionReader.setPosition(i);
+ switch (i % 4) {
+ case 0:
+ Assert.assertEquals(i, unionReader.readInteger().intValue());
+ break;
+ case 1:
+ Assert.assertEquals(i, unionReader.readLong().longValue());
+ break;
+ case 2:
+ Assert.assertEquals(i % 3, unionReader.size());
+ break;
+ case 3:
+ NullableTimeStampMilliHolder h = new NullableTimeStampMilliHolder();
+ unionReader.reader("timestamp").read(h);
+ Assert.assertEquals(i, h.value);
+ break;
+ default:
+ assert false : "Unexpected value in switch statement: " + i;
+ }
+ }
+ }
+
+ public void writeUnionData(int count, StructVector parent) {
+ ArrowBuf varchar = allocator.buffer(3);
+ varchar.readerIndex(0);
+ varchar.setByte(0, 'a');
+ varchar.setByte(1, 'b');
+ varchar.setByte(2, 'c');
+ varchar.writerIndex(3);
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ StructWriter rootWriter = writer.rootAsStruct();
+ IntWriter intWriter = rootWriter.integer("union");
+ BigIntWriter bigIntWriter = rootWriter.bigInt("union");
+ ListWriter listWriter = rootWriter.list("union");
+ StructWriter structWriter = rootWriter.struct("union");
+ for (int i = 0; i < count; i++) {
+ switch (i % 4) {
+ case 0:
+ intWriter.setPosition(i);
+ intWriter.writeInt(i);
+ break;
+ case 1:
+ bigIntWriter.setPosition(i);
+ bigIntWriter.writeBigInt(i);
+ break;
+ case 2:
+ listWriter.setPosition(i);
+ listWriter.startList();
+ for (int j = 0; j < i % 3; j++) {
+ listWriter.varChar().writeVarChar(0, 3, varchar);
+ }
+ listWriter.endList();
+ break;
+ case 3:
+ structWriter.setPosition(i);
+ structWriter.start();
+ structWriter.timeStampMilli("timestamp").writeTimeStampMilli(i);
+ structWriter.end();
+ break;
+ default:
+ assert false : "Unexpected value in switch statement: " + i;
+ }
+ }
+ writer.setValueCount(count);
+ varchar.getReferenceManager().release();
+ }
+
+ protected void writeVarBinaryData(int count, StructVector parent) {
+ Assert.assertTrue(count < 100);
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ StructWriter rootWriter = writer.rootAsStruct();
+ ListWriter listWriter = rootWriter.list("list");
+ ArrowBuf varbin = allocator.buffer(count);
+ for (int i = 0; i < count; i++) {
+ varbin.setByte(i, i);
+ listWriter.setPosition(i);
+ listWriter.startList();
+ for (int j = 0; j < i % 3; j++) {
+ listWriter.varBinary().writeVarBinary(0, i + 1, varbin);
+ }
+ listWriter.endList();
+ }
+ writer.setValueCount(count);
+ varbin.getReferenceManager().release();
+ }
+
+ protected void validateVarBinary(int count, VectorSchemaRoot root) {
+ Assert.assertEquals(count, root.getRowCount());
+ ListVector listVector = (ListVector) root.getVector("list");
+ byte[] expectedArray = new byte[count];
+ int numVarBinaryValues = 0;
+ for (int i = 0; i < count; i++) {
+ expectedArray[i] = (byte) i;
+ List<?> objList = listVector.getObject(i);
+ if (i % 3 == 0) {
+ Assert.assertTrue(objList.isEmpty());
+ } else {
+ byte[] expected = Arrays.copyOfRange(expectedArray, 0, i + 1);
+ for (int j = 0; j < i % 3; j++) {
+ byte[] result = (byte[]) objList.get(j);
+ Assert.assertArrayEquals(result, expected);
+ numVarBinaryValues++;
+ }
+ }
+ }
+
+ // ListVector lastSet should be the index of last value + 1
+ Assert.assertEquals(listVector.getLastSet(), count - 1);
+
+ // VarBinaryVector lastSet should be the index of last value
+ VarBinaryVector binaryVector = (VarBinaryVector) listVector.getChildrenFromFields().get(0);
+ Assert.assertEquals(binaryVector.getLastSet(), numVarBinaryValues - 1);
+ }
+
+ protected void writeBatchData(ArrowWriter writer, IntVector vector, VectorSchemaRoot root) throws IOException {
+ writer.start();
+
+ vector.setNull(0);
+ vector.setSafe(1, 1);
+ vector.setSafe(2, 2);
+ vector.setNull(3);
+ vector.setSafe(4, 1);
+ vector.setValueCount(5);
+ root.setRowCount(5);
+ writer.writeBatch();
+
+ vector.setNull(0);
+ vector.setSafe(1, 1);
+ vector.setSafe(2, 2);
+ vector.setValueCount(3);
+ root.setRowCount(3);
+ writer.writeBatch();
+
+ writer.end();
+ }
+
+ protected void validateBatchData(ArrowReader reader, IntVector vector) throws IOException {
+ reader.loadNextBatch();
+
+ assertEquals(vector.getValueCount(), 5);
+ assertTrue(vector.isNull(0));
+ assertEquals(vector.get(1), 1);
+ assertEquals(vector.get(2), 2);
+ assertTrue(vector.isNull(3));
+ assertEquals(vector.get(4), 1);
+
+ reader.loadNextBatch();
+
+ assertEquals(vector.getValueCount(), 3);
+ assertTrue(vector.isNull(0));
+ assertEquals(vector.get(1), 1);
+ assertEquals(vector.get(2), 2);
+ }
+
+ protected VectorSchemaRoot writeMapData(BufferAllocator bufferAllocator) {
+ MapVector mapVector = MapVector.empty("map", bufferAllocator, false);
+ MapVector sortedMapVector = MapVector.empty("mapSorted", bufferAllocator, true);
+ mapVector.allocateNew();
+ sortedMapVector.allocateNew();
+ UnionMapWriter mapWriter = mapVector.getWriter();
+ UnionMapWriter sortedMapWriter = sortedMapVector.getWriter();
+
+ final int count = 10;
+ for (int i = 0; i < count; i++) {
+ // Write mapVector with NULL values
+ // i == 1 is a NULL
+ if (i != 1) {
+ mapWriter.setPosition(i);
+ mapWriter.startMap();
+ // i == 3 is an empty map
+ if (i != 3) {
+ for (int j = 0; j < i + 1; j++) {
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(j);
+ // i == 5 maps to a NULL value
+ if (i != 5) {
+ mapWriter.value().integer().writeInt(j);
+ }
+ mapWriter.endEntry();
+ }
+ }
+ mapWriter.endMap();
+ }
+ // Write sortedMapVector
+ sortedMapWriter.setPosition(i);
+ sortedMapWriter.startMap();
+ for (int j = 0; j < i + 1; j++) {
+ sortedMapWriter.startEntry();
+ sortedMapWriter.key().bigInt().writeBigInt(j);
+ sortedMapWriter.value().integer().writeInt(j);
+ sortedMapWriter.endEntry();
+ }
+ sortedMapWriter.endMap();
+ }
+ mapWriter.setValueCount(COUNT);
+ sortedMapWriter.setValueCount(COUNT);
+
+ List<Field> fields = Collections2.asImmutableList(mapVector.getField(), sortedMapVector.getField());
+ List<FieldVector> vectors = Collections2.asImmutableList(mapVector, sortedMapVector);
+ return new VectorSchemaRoot(fields, vectors, count);
+ }
+
+ protected void validateMapData(VectorSchemaRoot root) {
+ MapVector mapVector = (MapVector) root.getVector("map");
+ MapVector sortedMapVector = (MapVector) root.getVector("mapSorted");
+
+ final int count = 10;
+ Assert.assertEquals(count, root.getRowCount());
+
+ UnionMapReader mapReader = new UnionMapReader(mapVector);
+ UnionMapReader sortedMapReader = new UnionMapReader(sortedMapVector);
+ for (int i = 0; i < count; i++) {
+ // Read mapVector with NULL values
+ mapReader.setPosition(i);
+ if (i == 1) {
+ assertFalse(mapReader.isSet());
+ } else {
+ if (i == 3) {
+ JsonStringArrayList<?> result = (JsonStringArrayList<?>) mapReader.readObject();
+ assertTrue(result.isEmpty());
+ } else {
+ for (int j = 0; j < i + 1; j++) {
+ mapReader.next();
+ assertEquals(j, mapReader.key().readLong().longValue());
+ if (i == 5) {
+ assertFalse(mapReader.value().isSet());
+ } else {
+ assertEquals(j, mapReader.value().readInteger().intValue());
+ }
+ }
+ }
+ }
+ // Read sortedMapVector
+ sortedMapReader.setPosition(i);
+ for (int j = 0; j < i + 1; j++) {
+ sortedMapReader.next();
+ assertEquals(j, sortedMapReader.key().readLong().longValue());
+ assertEquals(j, sortedMapReader.value().readInteger().intValue());
+ }
+ }
+ }
+
+ protected VectorSchemaRoot writeListAsMapData(BufferAllocator bufferAllocator) {
+ ListVector mapEntryList = ListVector.empty("entryList", bufferAllocator);
+ FieldType mapEntryType = new FieldType(false, ArrowType.Struct.INSTANCE, null, null);
+ StructVector mapEntryData = new StructVector("entryData", bufferAllocator, mapEntryType, null);
+ mapEntryData.addOrGet("myKey", new FieldType(false, new ArrowType.Int(64, true), null), BigIntVector.class);
+ mapEntryData.addOrGet("myValue", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ mapEntryList.initializeChildrenFromFields(Collections2.asImmutableList(mapEntryData.getField()));
+ UnionListWriter entryWriter = mapEntryList.getWriter();
+ entryWriter.allocate();
+
+ final int count = 10;
+ for (int i = 0; i < count; i++) {
+ entryWriter.setPosition(i);
+ entryWriter.startList();
+ for (int j = 0; j < i + 1; j++) {
+ entryWriter.struct().start();
+ entryWriter.struct().bigInt("myKey").writeBigInt(j);
+ entryWriter.struct().integer("myValue").writeInt(j);
+ entryWriter.struct().end();
+ }
+ entryWriter.endList();
+ }
+ entryWriter.setValueCount(COUNT);
+
+ MapVector mapVector = MapVector.empty("map", bufferAllocator, false);
+ mapEntryList.makeTransferPair(mapVector).transfer();
+
+ List<Field> fields = Collections2.asImmutableList(mapVector.getField());
+ List<FieldVector> vectors = Collections2.asImmutableList(mapVector);
+ return new VectorSchemaRoot(fields, vectors, count);
+ }
+
+ protected void validateListAsMapData(VectorSchemaRoot root) {
+ MapVector sortedMapVector = (MapVector) root.getVector("map");
+
+ final int count = 10;
+ Assert.assertEquals(count, root.getRowCount());
+
+ UnionMapReader sortedMapReader = new UnionMapReader(sortedMapVector);
+ sortedMapReader.setKeyValueNames("myKey", "myValue");
+ for (int i = 0; i < count; i++) {
+ sortedMapReader.setPosition(i);
+ for (int j = 0; j < i + 1; j++) {
+ sortedMapReader.next();
+ assertEquals(j, sortedMapReader.key().readLong().longValue());
+ assertEquals(j, sortedMapReader.value().readInteger().intValue());
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/ITTestIPCWithLargeArrowBuffers.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/ITTestIPCWithLargeArrowBuffers.java
new file mode 100644
index 000000000..d3c91fd14
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/ITTestIPCWithLargeArrowBuffers.java
@@ -0,0 +1,187 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Map;
+
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Integration test for reading/writing {@link org.apache.arrow.vector.VectorSchemaRoot} with
+ * large (more than 2GB) buffers by {@link ArrowReader} and {@link ArrowWriter}..
+ * To run this test, please make sure there is at least 8GB free memory, and 8GB
+ * free.disk space in the system.
+ */
+public class ITTestIPCWithLargeArrowBuffers {
+
+ private static final Logger logger = LoggerFactory.getLogger(ITTestIPCWithLargeArrowBuffers.class);
+
+ // 4GB buffer size
+ static final long BUFFER_SIZE = 4 * 1024 * 1024 * 1024L;
+
+ static final int DICTIONARY_VECTOR_SIZE = (int) (BUFFER_SIZE / BigIntVector.TYPE_WIDTH);
+
+ static final int ENCODED_VECTOR_SIZE = (int) (BUFFER_SIZE / IntVector.TYPE_WIDTH);
+
+ static final String FILE_NAME = "largeArrowData.data";
+
+ static final long DICTIONARY_ID = 123L;
+
+ static final ArrowType.Int ENCODED_VECTOR_TYPE = new ArrowType.Int(32, true);
+
+ static final DictionaryEncoding DICTIONARY_ENCODING =
+ new DictionaryEncoding(DICTIONARY_ID, false, ENCODED_VECTOR_TYPE);
+
+ static final FieldType ENCODED_FIELD_TYPE =
+ new FieldType(true, ENCODED_VECTOR_TYPE, DICTIONARY_ENCODING, null);
+
+ static final Field ENCODED_VECTOR_FIELD = new Field("encoded vector", ENCODED_FIELD_TYPE, null);
+
+ private void testWriteLargeArrowData(boolean streamMode) throws IOException {
+ // simulate encoding big int as int
+ try (RootAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+ BigIntVector dictVector = new BigIntVector("dic vector", allocator);
+ FileOutputStream out = new FileOutputStream(FILE_NAME);
+ IntVector encodedVector = (IntVector) ENCODED_VECTOR_FIELD.createVector(allocator)) {
+
+ // prepare dictionary provider.
+ DictionaryProvider.MapDictionaryProvider provider = new DictionaryProvider.MapDictionaryProvider();
+ Dictionary dictionary = new Dictionary(dictVector, DICTIONARY_ENCODING);
+ provider.put(dictionary);
+
+ // populate the dictionary vector
+ dictVector.allocateNew(DICTIONARY_VECTOR_SIZE);
+ for (int i = 0; i < DICTIONARY_VECTOR_SIZE; i++) {
+ dictVector.set(i, i);
+ }
+ dictVector.setValueCount(DICTIONARY_VECTOR_SIZE);
+ assertTrue(dictVector.getDataBuffer().capacity() > Integer.MAX_VALUE);
+ logger.trace("Populating dictionary vector finished");
+
+ // populate the encoded vector
+ encodedVector.allocateNew(ENCODED_VECTOR_SIZE);
+ for (int i = 0; i < ENCODED_VECTOR_SIZE; i++) {
+ encodedVector.set(i, i % DICTIONARY_VECTOR_SIZE);
+ }
+ encodedVector.setValueCount(ENCODED_VECTOR_SIZE);
+ assertTrue(encodedVector.getDataBuffer().capacity() > Integer.MAX_VALUE);
+ logger.trace("Populating encoded vector finished");
+
+ // build vector schema root and write data.
+ try (VectorSchemaRoot root =
+ new VectorSchemaRoot(
+ Arrays.asList(ENCODED_VECTOR_FIELD), Arrays.asList(encodedVector), ENCODED_VECTOR_SIZE);
+ ArrowWriter writer = streamMode ?
+ new ArrowStreamWriter(root, provider, out) :
+ new ArrowFileWriter(root, provider, out.getChannel())) {
+ writer.start();
+ writer.writeBatch();
+ writer.end();
+ logger.trace("Writing data finished");
+ }
+ }
+
+ assertTrue(new File(FILE_NAME).exists());
+ }
+
+ private void testReadLargeArrowData(boolean streamMode) throws IOException {
+ try (RootAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+ FileInputStream in = new FileInputStream(FILE_NAME);
+ ArrowReader reader = streamMode ?
+ new ArrowStreamReader(in, allocator) :
+ new ArrowFileReader(in.getChannel(), allocator)) {
+
+ // verify schema
+ Schema readSchema = reader.getVectorSchemaRoot().getSchema();
+ assertEquals(1, readSchema.getFields().size());
+ assertEquals(ENCODED_VECTOR_FIELD, readSchema.getFields().get(0));
+ logger.trace("Verifying schema finished");
+
+ // verify vector schema root
+ assertTrue(reader.loadNextBatch());
+ VectorSchemaRoot root = reader.getVectorSchemaRoot();
+
+ assertEquals(ENCODED_VECTOR_SIZE, root.getRowCount());
+ assertEquals(1, root.getFieldVectors().size());
+ assertTrue(root.getFieldVectors().get(0) instanceof IntVector);
+
+ IntVector encodedVector = (IntVector) root.getVector(0);
+ for (int i = 0; i < ENCODED_VECTOR_SIZE; i++) {
+ assertEquals(i % DICTIONARY_VECTOR_SIZE, encodedVector.get(i));
+ }
+ logger.trace("Verifying encoded vector finished");
+
+ // verify dictionary
+ Map<Long, Dictionary> dictVectors = reader.getDictionaryVectors();
+ assertEquals(1, dictVectors.size());
+ Dictionary dictionary = dictVectors.get(DICTIONARY_ID);
+ assertNotNull(dictionary);
+
+ assertTrue(dictionary.getVector() instanceof BigIntVector);
+ BigIntVector dictVector = (BigIntVector) dictionary.getVector();
+ assertEquals(DICTIONARY_VECTOR_SIZE, dictVector.getValueCount());
+ for (int i = 0; i < DICTIONARY_VECTOR_SIZE; i++) {
+ assertEquals(i, dictVector.get(i));
+ }
+ logger.trace("Verifying dictionary vector finished");
+
+ // ensure no more data available
+ assertFalse(reader.loadNextBatch());
+ } finally {
+ File dataFile = new File(FILE_NAME);
+ dataFile.delete();
+ assertFalse(dataFile.exists());
+ }
+ }
+
+ @Test
+ public void testIPC() throws IOException {
+ logger.trace("Start testing reading/writing large arrow stream data");
+ testWriteLargeArrowData(true);
+ testReadLargeArrowData(true);
+ logger.trace("Finish testing reading/writing large arrow stream data");
+
+ logger.trace("Start testing reading/writing large arrow file data");
+ testWriteLargeArrowData(false);
+ testReadLargeArrowData(false);
+ logger.trace("Finish testing reading/writing large arrow file data");
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/MessageSerializerTest.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/MessageSerializerTest.java
new file mode 100644
index 000000000..11b8d4fad
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/MessageSerializerTest.java
@@ -0,0 +1,247 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import static java.util.Arrays.asList;
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.Channels;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.ipc.message.ArrowMessage;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.ipc.message.IpcOption;
+import org.apache.arrow.vector.ipc.message.MessageSerializer;
+import org.apache.arrow.vector.types.MetadataVersion;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+
+public class MessageSerializerTest {
+
+ public static ArrowBuf buf(BufferAllocator alloc, byte[] bytes) {
+ ArrowBuf buffer = alloc.buffer(bytes.length);
+ buffer.writeBytes(bytes);
+ return buffer;
+ }
+
+ public static byte[] array(ArrowBuf buf) {
+ byte[] bytes = new byte[checkedCastToInt(buf.readableBytes())];
+ buf.readBytes(bytes);
+ return bytes;
+ }
+
+ private int intToByteRoundtrip(int v, byte[] bytes) {
+ MessageSerializer.intToBytes(v, bytes);
+ return MessageSerializer.bytesToInt(bytes);
+ }
+
+ @Test
+ public void testIntToBytes() {
+ byte[] bytes = new byte[4];
+ int[] values = new int[]{1, 15, 1 << 8, 1 << 16, Integer.MAX_VALUE};
+ for (int v : values) {
+ assertEquals(intToByteRoundtrip(v, bytes), v);
+ }
+ }
+
+ @Test
+ public void testWriteMessageBufferAligned() throws IOException {
+ ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
+ WriteChannel out = new WriteChannel(Channels.newChannel(outputStream));
+
+ // This is not a valid Arrow Message, only to test writing and alignment
+ ByteBuffer buffer = ByteBuffer.allocate(8).order(ByteOrder.nativeOrder());
+ buffer.putInt(1);
+ buffer.putInt(2);
+ buffer.flip();
+
+ int bytesWritten = MessageSerializer.writeMessageBuffer(out, 8, buffer);
+ assertEquals(16, bytesWritten);
+
+ buffer.rewind();
+ buffer.putInt(3);
+ buffer.flip();
+ bytesWritten = MessageSerializer.writeMessageBuffer(out, 4, buffer);
+ assertEquals(16, bytesWritten);
+
+ ByteArrayInputStream inputStream = new ByteArrayInputStream(outputStream.toByteArray());
+ ReadChannel in = new ReadChannel(Channels.newChannel(inputStream));
+ ByteBuffer result = ByteBuffer.allocate(32).order(ByteOrder.nativeOrder());
+ in.readFully(result);
+ result.rewind();
+
+ // First message continuation, size, and 2 int values
+ assertEquals(MessageSerializer.IPC_CONTINUATION_TOKEN, result.getInt());
+ // mesage length is represented in little endian
+ result.order(ByteOrder.LITTLE_ENDIAN);
+ assertEquals(8, result.getInt());
+ result.order(ByteOrder.nativeOrder());
+ assertEquals(1, result.getInt());
+ assertEquals(2, result.getInt());
+
+ // Second message continuation, size, 1 int value and 4 bytes padding
+ assertEquals(MessageSerializer.IPC_CONTINUATION_TOKEN, result.getInt());
+ // mesage length is represented in little endian
+ result.order(ByteOrder.LITTLE_ENDIAN);
+ assertEquals(8, result.getInt());
+ result.order(ByteOrder.nativeOrder());
+ assertEquals(3, result.getInt());
+ assertEquals(0, result.getInt());
+ }
+
+ @Test
+ public void testSchemaMessageSerialization() throws IOException {
+ Schema schema = testSchema();
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ long size = MessageSerializer.serialize(
+ new WriteChannel(Channels.newChannel(out)), schema);
+ assertEquals(size, out.toByteArray().length);
+
+ ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
+ Schema deserialized = MessageSerializer.deserializeSchema(
+ new ReadChannel(Channels.newChannel(in)));
+ assertEquals(schema, deserialized);
+ assertEquals(1, deserialized.getFields().size());
+ }
+
+ @Test
+ public void testSchemaDictionaryMessageSerialization() throws IOException {
+ DictionaryEncoding dictionary = new DictionaryEncoding(9L, false, new ArrowType.Int(8, true));
+ Field field = new Field("test", new FieldType(true, ArrowType.Utf8.INSTANCE, dictionary, null), null);
+ Schema schema = new Schema(Collections.singletonList(field));
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ long size = MessageSerializer.serialize(new WriteChannel(Channels.newChannel(out)), schema);
+ assertEquals(size, out.toByteArray().length);
+
+ ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
+ Schema deserialized = MessageSerializer.deserializeSchema(new ReadChannel(Channels.newChannel(in)));
+ assertEquals(schema, deserialized);
+ }
+
+ @Rule
+ public ExpectedException expectedEx = ExpectedException.none();
+
+ @Test
+ public void testSerializeRecordBatchV4() throws IOException {
+ byte[] validity = new byte[]{(byte) 255, 0};
+ // second half is "undefined"
+ byte[] values = new byte[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+
+ BufferAllocator alloc = new RootAllocator(Long.MAX_VALUE);
+ ArrowBuf validityb = buf(alloc, validity);
+ ArrowBuf valuesb = buf(alloc, values);
+
+ ArrowRecordBatch batch = new ArrowRecordBatch(
+ 16, asList(new ArrowFieldNode(16, 8)), asList(validityb, valuesb));
+
+ // avoid writing legacy ipc format by default
+ IpcOption option = new IpcOption(false, MetadataVersion.V4);
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ MessageSerializer.serialize(new WriteChannel(Channels.newChannel(out)), batch, option);
+
+ ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
+ ReadChannel channel = new ReadChannel(Channels.newChannel(in));
+ ArrowMessage deserialized = MessageSerializer.deserializeMessageBatch(channel, alloc);
+ assertEquals(ArrowRecordBatch.class, deserialized.getClass());
+ verifyBatch((ArrowRecordBatch) deserialized, validity, values);
+ }
+
+ @Test
+ public void testSerializeRecordBatchV5() throws Exception {
+ byte[] validity = new byte[]{(byte) 255, 0};
+ // second half is "undefined"
+ byte[] values = new byte[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+
+ BufferAllocator alloc = new RootAllocator(Long.MAX_VALUE);
+ ArrowBuf validityb = buf(alloc, validity);
+ ArrowBuf valuesb = buf(alloc, values);
+
+ ArrowRecordBatch batch = new ArrowRecordBatch(
+ 16, asList(new ArrowFieldNode(16, 8)), asList(validityb, valuesb));
+
+ // avoid writing legacy ipc format by default
+ IpcOption option = new IpcOption(false, MetadataVersion.V5);
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ MessageSerializer.serialize(new WriteChannel(Channels.newChannel(out)), batch, option);
+ validityb.close();
+ valuesb.close();
+ batch.close();
+
+ {
+ ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
+ ReadChannel channel = new ReadChannel(Channels.newChannel(in));
+ ArrowMessage deserialized = MessageSerializer.deserializeMessageBatch(channel, alloc);
+ assertEquals(ArrowRecordBatch.class, deserialized.getClass());
+ verifyBatch((ArrowRecordBatch) deserialized, validity, values);
+ deserialized.close();
+ }
+
+ {
+ byte[] validBytes = out.toByteArray();
+ byte[] missingBytes = Arrays.copyOfRange(validBytes, /*from=*/0, validBytes.length - 1);
+
+ ByteArrayInputStream in = new ByteArrayInputStream(missingBytes);
+ ReadChannel channel = new ReadChannel(Channels.newChannel(in));
+
+ assertThrows(IOException.class, () -> MessageSerializer.deserializeMessageBatch(channel, alloc));
+ }
+
+ alloc.close();
+ }
+
+ public static Schema testSchema() {
+ return new Schema(asList(new Field(
+ "testField", FieldType.nullable(new ArrowType.Int(8, true)), Collections.<Field>emptyList())));
+ }
+
+ // Verifies batch contents matching test schema.
+ public static void verifyBatch(ArrowRecordBatch batch, byte[] validity, byte[] values) {
+ assertTrue(batch != null);
+ List<ArrowFieldNode> nodes = batch.getNodes();
+ assertEquals(1, nodes.size());
+ ArrowFieldNode node = nodes.get(0);
+ assertEquals(16, node.getLength());
+ assertEquals(8, node.getNullCount());
+ List<ArrowBuf> buffers = batch.getBuffers();
+ assertEquals(2, buffers.size());
+ assertArrayEquals(validity, MessageSerializerTest.array(buffers.get(0)));
+ assertArrayEquals(values, MessageSerializerTest.array(buffers.get(1)));
+ }
+
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFile.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFile.java
new file mode 100644
index 000000000..4fb582278
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFile.java
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import static java.nio.channels.Channels.newChannel;
+import static org.apache.arrow.vector.TestUtils.newVarCharVector;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.util.Collections2;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TestArrowFile extends BaseFileTest {
+ private static final Logger LOGGER = LoggerFactory.getLogger(TestArrowFile.class);
+
+ @Test
+ public void testWrite() throws IOException {
+ File file = new File("target/mytest_write.arrow");
+ int count = COUNT;
+ try (
+ BufferAllocator vectorAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE);
+ StructVector parent = StructVector.empty("parent", vectorAllocator)) {
+ writeData(count, parent);
+ write(parent.getChild("root"), file, new ByteArrayOutputStream());
+ }
+ }
+
+ @Test
+ public void testWriteComplex() throws IOException {
+ File file = new File("target/mytest_write_complex.arrow");
+ int count = COUNT;
+ try (
+ BufferAllocator vectorAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE);
+ StructVector parent = StructVector.empty("parent", vectorAllocator)) {
+ writeComplexData(count, parent);
+ FieldVector root = parent.getChild("root");
+ validateComplexContent(count, new VectorSchemaRoot(root));
+ write(root, file, new ByteArrayOutputStream());
+ }
+ }
+
+ /**
+ * Writes the contents of parents to file. If outStream is non-null, also writes it
+ * to outStream in the streaming serialized format.
+ */
+ private void write(FieldVector parent, File file, OutputStream outStream) throws IOException {
+ VectorSchemaRoot root = new VectorSchemaRoot(parent);
+
+ try (FileOutputStream fileOutputStream = new FileOutputStream(file);
+ ArrowFileWriter arrowWriter = new ArrowFileWriter(root, null, fileOutputStream.getChannel());) {
+ LOGGER.debug("writing schema: " + root.getSchema());
+ arrowWriter.start();
+ arrowWriter.writeBatch();
+ arrowWriter.end();
+ }
+
+ // Also try serializing to the stream writer.
+ if (outStream != null) {
+ try (ArrowStreamWriter arrowWriter = new ArrowStreamWriter(root, null, outStream)) {
+ arrowWriter.start();
+ arrowWriter.writeBatch();
+ arrowWriter.end();
+ }
+ }
+ }
+
+ @Test
+ public void testFileStreamHasEos() throws IOException {
+
+ try (VarCharVector vector1 = newVarCharVector("varchar1", allocator)) {
+ vector1.allocateNewSafe();
+ vector1.set(0, "foo".getBytes(StandardCharsets.UTF_8));
+ vector1.set(1, "bar".getBytes(StandardCharsets.UTF_8));
+ vector1.set(3, "baz".getBytes(StandardCharsets.UTF_8));
+ vector1.set(4, "bar".getBytes(StandardCharsets.UTF_8));
+ vector1.set(5, "baz".getBytes(StandardCharsets.UTF_8));
+ vector1.setValueCount(6);
+
+ List<Field> fields = Arrays.asList(vector1.getField());
+ List<FieldVector> vectors = Collections2.asImmutableList(vector1);
+ VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors, vector1.getValueCount());
+
+ // write data
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ ArrowFileWriter writer = new ArrowFileWriter(root, null, newChannel(out));
+ writer.start();
+ writer.writeBatch();
+ writer.end();
+
+ byte[] bytes = out.toByteArray();
+ byte[] bytesWithoutMagic = new byte[bytes.length - 8];
+ System.arraycopy(bytes, 8, bytesWithoutMagic, 0, bytesWithoutMagic.length);
+
+ try (ArrowStreamReader reader = new ArrowStreamReader(new ByteArrayInputStream(bytesWithoutMagic), allocator)) {
+ assertTrue(reader.loadNextBatch());
+ // here will throw exception if read footer instead of eos.
+ assertFalse(reader.loadNextBatch());
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFooter.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFooter.java
new file mode 100644
index 000000000..38c65bdde
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFooter.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import static java.util.Arrays.asList;
+import static org.junit.Assert.assertEquals;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.arrow.flatbuf.Footer;
+import org.apache.arrow.vector.ipc.message.ArrowBlock;
+import org.apache.arrow.vector.ipc.message.ArrowFooter;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.Test;
+
+import com.google.flatbuffers.FlatBufferBuilder;
+
+public class TestArrowFooter {
+
+ @Test
+ public void test() {
+ Schema schema = new Schema(asList(
+ new Field("a", FieldType.nullable(new ArrowType.Int(8, true)), Collections.<Field>emptyList())
+ ));
+ ArrowFooter footer =
+ new ArrowFooter(schema, Collections.<ArrowBlock>emptyList(), Collections.<ArrowBlock>emptyList());
+ ArrowFooter newFooter = roundTrip(footer);
+ assertEquals(footer, newFooter);
+
+ List<ArrowBlock> ids = new ArrayList<>();
+ ids.add(new ArrowBlock(0, 1, 2));
+ ids.add(new ArrowBlock(4, 5, 6));
+ footer = new ArrowFooter(schema, ids, ids);
+ assertEquals(footer, roundTrip(footer));
+ }
+
+
+ private ArrowFooter roundTrip(ArrowFooter footer) {
+ FlatBufferBuilder builder = new FlatBufferBuilder();
+ int i = footer.writeTo(builder);
+ builder.finish(i);
+ ByteBuffer dataBuffer = builder.dataBuffer();
+ ArrowFooter newFooter = new ArrowFooter(Footer.getRootAsFooter(dataBuffer));
+ return newFooter;
+ }
+
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowReaderWriter.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowReaderWriter.java
new file mode 100644
index 000000000..1167819de
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowReaderWriter.java
@@ -0,0 +1,882 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import static java.nio.channels.Channels.newChannel;
+import static java.util.Arrays.asList;
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+import static org.apache.arrow.vector.TestUtils.newVarCharVector;
+import static org.apache.arrow.vector.TestUtils.newVector;
+import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.Channels;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.function.BiFunction;
+import java.util.stream.Collectors;
+
+import org.apache.arrow.flatbuf.FieldNode;
+import org.apache.arrow.flatbuf.Message;
+import org.apache.arrow.flatbuf.RecordBatch;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.util.AutoCloseables;
+import org.apache.arrow.util.Collections2;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.TestUtils;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorLoader;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.VectorUnloader;
+import org.apache.arrow.vector.compare.Range;
+import org.apache.arrow.vector.compare.RangeEqualsVisitor;
+import org.apache.arrow.vector.compare.TypeEqualsVisitor;
+import org.apache.arrow.vector.compare.VectorEqualsVisitor;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryEncoder;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.ipc.message.ArrowBlock;
+import org.apache.arrow.vector.ipc.message.ArrowDictionaryBatch;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.ipc.message.IpcOption;
+import org.apache.arrow.vector.ipc.message.MessageSerializer;
+import org.apache.arrow.vector.types.MetadataVersion;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel;
+import org.apache.arrow.vector.util.DictionaryUtility;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestArrowReaderWriter {
+
+ private BufferAllocator allocator;
+
+ private VarCharVector dictionaryVector1;
+ private VarCharVector dictionaryVector2;
+ private VarCharVector dictionaryVector3;
+ private StructVector dictionaryVector4;
+
+ private Dictionary dictionary1;
+ private Dictionary dictionary2;
+ private Dictionary dictionary3;
+ private Dictionary dictionary4;
+
+ private Schema schema;
+ private Schema encodedSchema;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+
+ dictionaryVector1 = newVarCharVector("D1", allocator);
+ setVector(dictionaryVector1,
+ "foo".getBytes(StandardCharsets.UTF_8),
+ "bar".getBytes(StandardCharsets.UTF_8),
+ "baz".getBytes(StandardCharsets.UTF_8));
+
+ dictionaryVector2 = newVarCharVector("D2", allocator);
+ setVector(dictionaryVector2,
+ "aa".getBytes(StandardCharsets.UTF_8),
+ "bb".getBytes(StandardCharsets.UTF_8),
+ "cc".getBytes(StandardCharsets.UTF_8));
+
+ dictionaryVector3 = newVarCharVector("D3", allocator);
+ setVector(dictionaryVector3,
+ "foo".getBytes(StandardCharsets.UTF_8),
+ "bar".getBytes(StandardCharsets.UTF_8),
+ "baz".getBytes(StandardCharsets.UTF_8),
+ "aa".getBytes(StandardCharsets.UTF_8),
+ "bb".getBytes(StandardCharsets.UTF_8),
+ "cc".getBytes(StandardCharsets.UTF_8));
+
+ dictionaryVector4 = newVector(StructVector.class, "D4", MinorType.STRUCT, allocator);
+ final Map<String, List<Integer>> dictionaryValues4 = new HashMap<>();
+ dictionaryValues4.put("a", Arrays.asList(1, 2, 3));
+ dictionaryValues4.put("b", Arrays.asList(4, 5, 6));
+ setVector(dictionaryVector4, dictionaryValues4);
+
+ dictionary1 = new Dictionary(dictionaryVector1,
+ new DictionaryEncoding(/*id=*/1L, /*ordered=*/false, /*indexType=*/null));
+ dictionary2 = new Dictionary(dictionaryVector2,
+ new DictionaryEncoding(/*id=*/2L, /*ordered=*/false, /*indexType=*/null));
+ dictionary3 = new Dictionary(dictionaryVector3,
+ new DictionaryEncoding(/*id=*/1L, /*ordered=*/false, /*indexType=*/null));
+ dictionary4 = new Dictionary(dictionaryVector4,
+ new DictionaryEncoding(/*id=*/3L, /*ordered=*/false, /*indexType=*/null));
+ }
+
+ @After
+ public void terminate() throws Exception {
+ dictionaryVector1.close();
+ dictionaryVector2.close();
+ dictionaryVector3.close();
+ dictionaryVector4.close();
+ allocator.close();
+ }
+
+ ArrowBuf buf(byte[] bytes) {
+ ArrowBuf buffer = allocator.buffer(bytes.length);
+ buffer.writeBytes(bytes);
+ return buffer;
+ }
+
+ byte[] array(ArrowBuf buf) {
+ byte[] bytes = new byte[checkedCastToInt(buf.readableBytes())];
+ buf.readBytes(bytes);
+ return bytes;
+ }
+
+ @Test
+ public void test() throws IOException {
+ Schema schema = new Schema(asList(new Field("testField", FieldType.nullable(new ArrowType.Int(8, true)),
+ Collections.<Field>emptyList())));
+ ArrowType type = schema.getFields().get(0).getType();
+ FieldVector vector = TestUtils.newVector(FieldVector.class, "testField", type, allocator);
+ vector.initializeChildrenFromFields(schema.getFields().get(0).getChildren());
+
+ byte[] validity = new byte[] {(byte) 255, 0};
+ // second half is "undefined"
+ byte[] values = new byte[] {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ try (VectorSchemaRoot root = new VectorSchemaRoot(schema.getFields(), asList(vector), 16);
+ ArrowFileWriter writer = new ArrowFileWriter(root, null, newChannel(out))) {
+ ArrowBuf validityb = buf(validity);
+ ArrowBuf valuesb = buf(values);
+ ArrowRecordBatch batch = new ArrowRecordBatch(16, asList(new ArrowFieldNode(16, 8)), asList(validityb, valuesb));
+ VectorLoader loader = new VectorLoader(root);
+ loader.load(batch);
+ writer.writeBatch();
+
+ validityb.close();
+ valuesb.close();
+ batch.close();
+ }
+
+ byte[] byteArray = out.toByteArray();
+
+ try (SeekableReadChannel channel = new SeekableReadChannel(new ByteArrayReadableSeekableByteChannel(byteArray));
+ ArrowFileReader reader = new ArrowFileReader(channel, allocator)) {
+ Schema readSchema = reader.getVectorSchemaRoot().getSchema();
+ assertEquals(schema, readSchema);
+ // TODO: dictionaries
+ List<ArrowBlock> recordBatches = reader.getRecordBlocks();
+ assertEquals(1, recordBatches.size());
+ reader.loadNextBatch();
+ VectorUnloader unloader = new VectorUnloader(reader.getVectorSchemaRoot());
+ ArrowRecordBatch recordBatch = unloader.getRecordBatch();
+ List<ArrowFieldNode> nodes = recordBatch.getNodes();
+ assertEquals(1, nodes.size());
+ ArrowFieldNode node = nodes.get(0);
+ assertEquals(16, node.getLength());
+ assertEquals(8, node.getNullCount());
+ List<ArrowBuf> buffers = recordBatch.getBuffers();
+ assertEquals(2, buffers.size());
+ assertArrayEquals(validity, array(buffers.get(0)));
+ assertArrayEquals(values, array(buffers.get(1)));
+
+ // Read just the header. This demonstrates being able to read without need to
+ // deserialize the buffer.
+ ByteBuffer headerBuffer = ByteBuffer.allocate(recordBatches.get(0).getMetadataLength());
+ headerBuffer.put(byteArray, (int) recordBatches.get(0).getOffset(), headerBuffer.capacity());
+ // new format prefix_size ==8
+ headerBuffer.position(8);
+ Message messageFB = Message.getRootAsMessage(headerBuffer);
+ RecordBatch recordBatchFB = (RecordBatch) messageFB.header(new RecordBatch());
+ assertEquals(2, recordBatchFB.buffersLength());
+ assertEquals(1, recordBatchFB.nodesLength());
+ FieldNode nodeFB = recordBatchFB.nodes(0);
+ assertEquals(16, nodeFB.length());
+ assertEquals(8, nodeFB.nullCount());
+
+ recordBatch.close();
+ }
+ }
+
+ @Test
+ public void testWriteReadNullVector() throws IOException {
+
+ int valueCount = 3;
+
+ NullVector nullVector = new NullVector("vector");
+ nullVector.setValueCount(valueCount);
+
+ Schema schema = new Schema(asList(nullVector.getField()));
+
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ try (VectorSchemaRoot root = new VectorSchemaRoot(schema.getFields(), asList(nullVector), valueCount);
+ ArrowFileWriter writer = new ArrowFileWriter(root, null, newChannel(out))) {
+ ArrowRecordBatch batch = new ArrowRecordBatch(valueCount,
+ asList(new ArrowFieldNode(valueCount, 0)),
+ Collections.emptyList());
+ VectorLoader loader = new VectorLoader(root);
+ loader.load(batch);
+ writer.writeBatch();
+ }
+
+ byte[] byteArray = out.toByteArray();
+
+ try (SeekableReadChannel channel = new SeekableReadChannel(new ByteArrayReadableSeekableByteChannel(byteArray));
+ ArrowFileReader reader = new ArrowFileReader(channel, allocator)) {
+ Schema readSchema = reader.getVectorSchemaRoot().getSchema();
+ assertEquals(schema, readSchema);
+ List<ArrowBlock> recordBatches = reader.getRecordBlocks();
+ assertEquals(1, recordBatches.size());
+
+ assertTrue(reader.loadNextBatch());
+ assertEquals(1, reader.getVectorSchemaRoot().getFieldVectors().size());
+
+ NullVector readNullVector = (NullVector) reader.getVectorSchemaRoot().getFieldVectors().get(0);
+ assertEquals(valueCount, readNullVector.getValueCount());
+ }
+ }
+
+ @Test
+ public void testWriteReadWithDictionaries() throws IOException {
+ DictionaryProvider.MapDictionaryProvider provider = new DictionaryProvider.MapDictionaryProvider();
+ provider.put(dictionary1);
+
+ VarCharVector vector1 = newVarCharVector("varchar1", allocator);
+ vector1.allocateNewSafe();
+ vector1.set(0, "foo".getBytes(StandardCharsets.UTF_8));
+ vector1.set(1, "bar".getBytes(StandardCharsets.UTF_8));
+ vector1.set(3, "baz".getBytes(StandardCharsets.UTF_8));
+ vector1.set(4, "bar".getBytes(StandardCharsets.UTF_8));
+ vector1.set(5, "baz".getBytes(StandardCharsets.UTF_8));
+ vector1.setValueCount(6);
+ FieldVector encodedVector1 = (FieldVector) DictionaryEncoder.encode(vector1, dictionary1);
+ vector1.close();
+
+ VarCharVector vector2 = newVarCharVector("varchar2", allocator);
+ vector2.allocateNewSafe();
+ vector2.set(0, "bar".getBytes(StandardCharsets.UTF_8));
+ vector2.set(1, "baz".getBytes(StandardCharsets.UTF_8));
+ vector2.set(2, "foo".getBytes(StandardCharsets.UTF_8));
+ vector2.set(3, "foo".getBytes(StandardCharsets.UTF_8));
+ vector2.set(4, "foo".getBytes(StandardCharsets.UTF_8));
+ vector2.set(5, "bar".getBytes(StandardCharsets.UTF_8));
+ vector2.setValueCount(6);
+ FieldVector encodedVector2 = (FieldVector) DictionaryEncoder.encode(vector2, dictionary1);
+ vector2.close();
+
+ List<Field> fields = Arrays.asList(encodedVector1.getField(), encodedVector2.getField());
+ List<FieldVector> vectors = Collections2.asImmutableList(encodedVector1, encodedVector2);
+ try (VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors, encodedVector1.getValueCount());
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ ArrowFileWriter writer = new ArrowFileWriter(root, provider, newChannel(out));) {
+
+ writer.start();
+ writer.writeBatch();
+ writer.end();
+
+ try (SeekableReadChannel channel = new SeekableReadChannel(
+ new ByteArrayReadableSeekableByteChannel(out.toByteArray()));
+ ArrowFileReader reader = new ArrowFileReader(channel, allocator)) {
+ Schema readSchema = reader.getVectorSchemaRoot().getSchema();
+ assertEquals(root.getSchema(), readSchema);
+ assertEquals(1, reader.getDictionaryBlocks().size());
+ assertEquals(1, reader.getRecordBlocks().size());
+
+ reader.loadNextBatch();
+ assertEquals(2, reader.getVectorSchemaRoot().getFieldVectors().size());
+ }
+ }
+ }
+
+ @Test
+ public void testWriteReadWithStructDictionaries() throws IOException {
+ DictionaryProvider.MapDictionaryProvider provider =
+ new DictionaryProvider.MapDictionaryProvider();
+ provider.put(dictionary4);
+
+ try (final StructVector vector =
+ newVector(StructVector.class, "D4", MinorType.STRUCT, allocator)) {
+ final Map<String, List<Integer>> values = new HashMap<>();
+ // Index: 0, 2, 1, 2, 1, 0, 0
+ values.put("a", Arrays.asList(1, 3, 2, 3, 2, 1, 1));
+ values.put("b", Arrays.asList(4, 6, 5, 6, 5, 4, 4));
+ setVector(vector, values);
+ FieldVector encodedVector = (FieldVector) DictionaryEncoder.encode(vector, dictionary4);
+
+ List<Field> fields = Arrays.asList(encodedVector.getField());
+ List<FieldVector> vectors = Collections2.asImmutableList(encodedVector);
+ try (
+ VectorSchemaRoot root =
+ new VectorSchemaRoot(fields, vectors, encodedVector.getValueCount());
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ ArrowFileWriter writer = new ArrowFileWriter(root, provider, newChannel(out));) {
+
+ writer.start();
+ writer.writeBatch();
+ writer.end();
+
+ try (
+ SeekableReadChannel channel = new SeekableReadChannel(
+ new ByteArrayReadableSeekableByteChannel(out.toByteArray()));
+ ArrowFileReader reader = new ArrowFileReader(channel, allocator)) {
+ final VectorSchemaRoot readRoot = reader.getVectorSchemaRoot();
+ final Schema readSchema = readRoot.getSchema();
+ assertEquals(root.getSchema(), readSchema);
+ assertEquals(1, reader.getDictionaryBlocks().size());
+ assertEquals(1, reader.getRecordBlocks().size());
+
+ reader.loadNextBatch();
+ assertEquals(1, readRoot.getFieldVectors().size());
+ assertEquals(1, reader.getDictionaryVectors().size());
+
+ // Read the encoded vector and check it
+ final FieldVector readEncoded = readRoot.getVector(0);
+ assertEquals(encodedVector.getValueCount(), readEncoded.getValueCount());
+ assertTrue(new RangeEqualsVisitor(encodedVector, readEncoded)
+ .rangeEquals(new Range(0, 0, encodedVector.getValueCount())));
+
+ // Read the dictionary
+ final Map<Long, Dictionary> readDictionaryMap = reader.getDictionaryVectors();
+ final Dictionary readDictionary =
+ readDictionaryMap.get(readEncoded.getField().getDictionary().getId());
+ assertNotNull(readDictionary);
+
+ // Assert the dictionary vector is correct
+ final FieldVector readDictionaryVector = readDictionary.getVector();
+ assertEquals(dictionaryVector4.getValueCount(), readDictionaryVector.getValueCount());
+ final BiFunction<ValueVector, ValueVector, Boolean> typeComparatorIgnoreName =
+ (v1, v2) -> new TypeEqualsVisitor(v1, false, true).equals(v2);
+ assertTrue("Dictionary vectors are not equal",
+ new RangeEqualsVisitor(dictionaryVector4, readDictionaryVector,
+ typeComparatorIgnoreName)
+ .rangeEquals(new Range(0, 0, dictionaryVector4.getValueCount())));
+
+ // Assert the decoded vector is correct
+ try (final ValueVector readVector =
+ DictionaryEncoder.decode(readEncoded, readDictionary)) {
+ assertEquals(vector.getValueCount(), readVector.getValueCount());
+ assertTrue("Decoded vectors are not equal",
+ new RangeEqualsVisitor(vector, readVector, typeComparatorIgnoreName)
+ .rangeEquals(new Range(0, 0, vector.getValueCount())));
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testEmptyStreamInFileIPC() throws IOException {
+
+ DictionaryProvider.MapDictionaryProvider provider = new DictionaryProvider.MapDictionaryProvider();
+ provider.put(dictionary1);
+
+ VarCharVector vector = newVarCharVector("varchar", allocator);
+ vector.allocateNewSafe();
+ vector.set(0, "foo".getBytes(StandardCharsets.UTF_8));
+ vector.set(1, "bar".getBytes(StandardCharsets.UTF_8));
+ vector.set(3, "baz".getBytes(StandardCharsets.UTF_8));
+ vector.set(4, "bar".getBytes(StandardCharsets.UTF_8));
+ vector.set(5, "baz".getBytes(StandardCharsets.UTF_8));
+ vector.setValueCount(6);
+
+ FieldVector encodedVector1A = (FieldVector) DictionaryEncoder.encode(vector, dictionary1);
+ vector.close();
+
+ List<Field> fields = Arrays.asList(encodedVector1A.getField());
+ List<FieldVector> vectors = Collections2.asImmutableList(encodedVector1A);
+
+ try (VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors, encodedVector1A.getValueCount());
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ ArrowFileWriter writer = new ArrowFileWriter(root, provider, newChannel(out))) {
+
+ writer.start();
+ writer.end();
+
+ try (SeekableReadChannel channel = new SeekableReadChannel(
+ new ByteArrayReadableSeekableByteChannel(out.toByteArray()));
+ ArrowFileReader reader = new ArrowFileReader(channel, allocator)) {
+ Schema readSchema = reader.getVectorSchemaRoot().getSchema();
+ assertEquals(root.getSchema(), readSchema);
+ assertEquals(1, reader.getDictionaryVectors().size());
+ assertEquals(0, reader.getDictionaryBlocks().size());
+ assertEquals(0, reader.getRecordBlocks().size());
+ }
+ }
+
+ }
+
+ @Test
+ public void testEmptyStreamInStreamingIPC() throws IOException {
+
+ DictionaryProvider.MapDictionaryProvider provider = new DictionaryProvider.MapDictionaryProvider();
+ provider.put(dictionary1);
+
+ VarCharVector vector = newVarCharVector("varchar", allocator);
+ vector.allocateNewSafe();
+ vector.set(0, "foo".getBytes(StandardCharsets.UTF_8));
+ vector.set(1, "bar".getBytes(StandardCharsets.UTF_8));
+ vector.set(3, "baz".getBytes(StandardCharsets.UTF_8));
+ vector.set(4, "bar".getBytes(StandardCharsets.UTF_8));
+ vector.set(5, "baz".getBytes(StandardCharsets.UTF_8));
+ vector.setValueCount(6);
+
+ FieldVector encodedVector = (FieldVector) DictionaryEncoder.encode(vector, dictionary1);
+ vector.close();
+
+ List<Field> fields = Arrays.asList(encodedVector.getField());
+ try (VectorSchemaRoot root =
+ new VectorSchemaRoot(fields, Arrays.asList(encodedVector), encodedVector.getValueCount());
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ ArrowStreamWriter writer = new ArrowStreamWriter(root, provider, newChannel(out))) {
+
+ writer.start();
+ writer.end();
+
+
+ try (ArrowStreamReader reader = new ArrowStreamReader(
+ new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator)) {
+ Schema readSchema = reader.getVectorSchemaRoot().getSchema();
+ assertEquals(root.getSchema(), readSchema);
+ assertEquals(1, reader.getDictionaryVectors().size());
+ assertFalse(reader.loadNextBatch());
+ }
+ }
+
+ }
+
+ @Test
+ public void testDictionaryReplacement() throws Exception {
+ VarCharVector vector1 = newVarCharVector("varchar1", allocator);
+ setVector(vector1,
+ "foo".getBytes(StandardCharsets.UTF_8),
+ "bar".getBytes(StandardCharsets.UTF_8),
+ "baz".getBytes(StandardCharsets.UTF_8),
+ "bar".getBytes(StandardCharsets.UTF_8));
+
+ FieldVector encodedVector1 = (FieldVector) DictionaryEncoder.encode(vector1, dictionary1);
+
+ VarCharVector vector2 = newVarCharVector("varchar2", allocator);
+ setVector(vector2,
+ "foo".getBytes(StandardCharsets.UTF_8),
+ "foo".getBytes(StandardCharsets.UTF_8),
+ "foo".getBytes(StandardCharsets.UTF_8),
+ "foo".getBytes(StandardCharsets.UTF_8));
+
+ FieldVector encodedVector2 = (FieldVector) DictionaryEncoder.encode(vector2, dictionary1);
+
+ DictionaryProvider.MapDictionaryProvider provider = new DictionaryProvider.MapDictionaryProvider();
+ provider.put(dictionary1);
+ List<Field> schemaFields = new ArrayList<>();
+ schemaFields.add(DictionaryUtility.toMessageFormat(encodedVector1.getField(), provider, new HashSet<>()));
+ schemaFields.add(DictionaryUtility.toMessageFormat(encodedVector2.getField(), provider, new HashSet<>()));
+ Schema schema = new Schema(schemaFields);
+
+ ByteArrayOutputStream outStream = new ByteArrayOutputStream();
+ WriteChannel out = new WriteChannel(newChannel(outStream));
+
+ // write schema
+ MessageSerializer.serialize(out, schema);
+
+ List<AutoCloseable> closeableList = new ArrayList<>();
+
+ // write non-delta dictionary with id=1
+ serializeDictionaryBatch(out, dictionary3, false, closeableList);
+
+ // write non-delta dictionary with id=1
+ serializeDictionaryBatch(out, dictionary1, false, closeableList);
+
+ // write recordBatch2
+ serializeRecordBatch(out, Arrays.asList(encodedVector1, encodedVector2), closeableList);
+
+ // write eos
+ out.writeIntLittleEndian(0);
+
+ try (ArrowStreamReader reader = new ArrowStreamReader(
+ new ByteArrayReadableSeekableByteChannel(outStream.toByteArray()), allocator)) {
+ assertEquals(1, reader.getDictionaryVectors().size());
+ assertTrue(reader.loadNextBatch());
+ FieldVector dictionaryVector = reader.getDictionaryVectors().get(1L).getVector();
+ // make sure the delta dictionary is concatenated.
+ assertTrue(VectorEqualsVisitor.vectorEquals(dictionaryVector, dictionaryVector1, null));
+ assertFalse(reader.loadNextBatch());
+ }
+
+ vector1.close();
+ vector2.close();
+ AutoCloseables.close(closeableList);
+ }
+
+ @Test
+ public void testDeltaDictionary() throws Exception {
+ VarCharVector vector1 = newVarCharVector("varchar1", allocator);
+ setVector(vector1,
+ "foo".getBytes(StandardCharsets.UTF_8),
+ "bar".getBytes(StandardCharsets.UTF_8),
+ "baz".getBytes(StandardCharsets.UTF_8),
+ "bar".getBytes(StandardCharsets.UTF_8));
+
+ FieldVector encodedVector1 = (FieldVector) DictionaryEncoder.encode(vector1, dictionary1);
+
+ VarCharVector vector2 = newVarCharVector("varchar2", allocator);
+ setVector(vector2,
+ "foo".getBytes(StandardCharsets.UTF_8),
+ "aa".getBytes(StandardCharsets.UTF_8),
+ "bb".getBytes(StandardCharsets.UTF_8),
+ "cc".getBytes(StandardCharsets.UTF_8));
+
+ FieldVector encodedVector2 = (FieldVector) DictionaryEncoder.encode(vector2, dictionary3);
+
+ DictionaryProvider.MapDictionaryProvider provider = new DictionaryProvider.MapDictionaryProvider();
+ provider.put(dictionary1);
+ provider.put(dictionary3);
+ List<Field> schemaFields = new ArrayList<>();
+ schemaFields.add(DictionaryUtility.toMessageFormat(encodedVector1.getField(), provider, new HashSet<>()));
+ schemaFields.add(DictionaryUtility.toMessageFormat(encodedVector2.getField(), provider, new HashSet<>()));
+ Schema schema = new Schema(schemaFields);
+
+ ByteArrayOutputStream outStream = new ByteArrayOutputStream();
+ WriteChannel out = new WriteChannel(newChannel(outStream));
+
+ // write schema
+ MessageSerializer.serialize(out, schema);
+
+ List<AutoCloseable> closeableList = new ArrayList<>();
+
+ // write non-delta dictionary with id=1
+ serializeDictionaryBatch(out, dictionary1, false, closeableList);
+
+ // write delta dictionary with id=1
+ Dictionary deltaDictionary =
+ new Dictionary(dictionaryVector2, new DictionaryEncoding(1L, false, null));
+ serializeDictionaryBatch(out, deltaDictionary, true, closeableList);
+ deltaDictionary.getVector().close();
+
+ // write recordBatch2
+ serializeRecordBatch(out, Arrays.asList(encodedVector1, encodedVector2), closeableList);
+
+ // write eos
+ out.writeIntLittleEndian(0);
+
+ try (ArrowStreamReader reader = new ArrowStreamReader(
+ new ByteArrayReadableSeekableByteChannel(outStream.toByteArray()), allocator)) {
+ assertEquals(1, reader.getDictionaryVectors().size());
+ assertTrue(reader.loadNextBatch());
+ FieldVector dictionaryVector = reader.getDictionaryVectors().get(1L).getVector();
+ // make sure the delta dictionary is concatenated.
+ assertTrue(VectorEqualsVisitor.vectorEquals(dictionaryVector, dictionaryVector3, null));
+ assertFalse(reader.loadNextBatch());
+ }
+
+ vector1.close();
+ vector2.close();
+ AutoCloseables.close(closeableList);
+
+ }
+
+ private void serializeDictionaryBatch(
+ WriteChannel out,
+ Dictionary dictionary,
+ boolean isDelta,
+ List<AutoCloseable> closeables) throws IOException {
+
+ FieldVector dictVector = dictionary.getVector();
+ VectorSchemaRoot root = new VectorSchemaRoot(
+ Collections.singletonList(dictVector.getField()),
+ Collections.singletonList(dictVector),
+ dictVector.getValueCount());
+ ArrowDictionaryBatch batch =
+ new ArrowDictionaryBatch(dictionary.getEncoding().getId(), new VectorUnloader(root).getRecordBatch(), isDelta);
+ MessageSerializer.serialize(out, batch);
+ closeables.add(batch);
+ closeables.add(root);
+ }
+
+ private void serializeRecordBatch(
+ WriteChannel out,
+ List<FieldVector> vectors,
+ List<AutoCloseable> closeables) throws IOException {
+
+ List<Field> fields = vectors.stream().map(v -> v.getField()).collect(Collectors.toList());
+ VectorSchemaRoot root = new VectorSchemaRoot(
+ fields,
+ vectors,
+ vectors.get(0).getValueCount());
+ VectorUnloader unloader = new VectorUnloader(root);
+ ArrowRecordBatch batch = unloader.getRecordBatch();
+ MessageSerializer.serialize(out, batch);
+ closeables.add(batch);
+ closeables.add(root);
+ }
+
+ @Test
+ public void testReadInterleavedData() throws IOException {
+ List<ArrowRecordBatch> batches = createRecordBatches();
+
+ ByteArrayOutputStream outStream = new ByteArrayOutputStream();
+ WriteChannel out = new WriteChannel(newChannel(outStream));
+
+ // write schema
+ MessageSerializer.serialize(out, schema);
+
+ // write dictionary1
+ FieldVector dictVector1 = dictionary1.getVector();
+ VectorSchemaRoot dictRoot1 = new VectorSchemaRoot(
+ Collections.singletonList(dictVector1.getField()),
+ Collections.singletonList(dictVector1),
+ dictVector1.getValueCount());
+ ArrowDictionaryBatch dictionaryBatch1 =
+ new ArrowDictionaryBatch(1, new VectorUnloader(dictRoot1).getRecordBatch());
+ MessageSerializer.serialize(out, dictionaryBatch1);
+ dictionaryBatch1.close();
+ dictRoot1.close();
+
+ // write recordBatch1
+ MessageSerializer.serialize(out, batches.get(0));
+
+ // write dictionary2
+ FieldVector dictVector2 = dictionary2.getVector();
+ VectorSchemaRoot dictRoot2 = new VectorSchemaRoot(
+ Collections.singletonList(dictVector2.getField()),
+ Collections.singletonList(dictVector2),
+ dictVector2.getValueCount());
+ ArrowDictionaryBatch dictionaryBatch2 =
+ new ArrowDictionaryBatch(2, new VectorUnloader(dictRoot2).getRecordBatch());
+ MessageSerializer.serialize(out, dictionaryBatch2);
+ dictionaryBatch2.close();
+ dictRoot2.close();
+
+ // write recordBatch1
+ MessageSerializer.serialize(out, batches.get(1));
+
+ // write eos
+ out.writeIntLittleEndian(0);
+
+ try (ArrowStreamReader reader = new ArrowStreamReader(
+ new ByteArrayReadableSeekableByteChannel(outStream.toByteArray()), allocator)) {
+ Schema readSchema = reader.getVectorSchemaRoot().getSchema();
+ assertEquals(encodedSchema, readSchema);
+ assertEquals(2, reader.getDictionaryVectors().size());
+ assertTrue(reader.loadNextBatch());
+ assertTrue(reader.loadNextBatch());
+ assertFalse(reader.loadNextBatch());
+ }
+
+ batches.forEach(batch -> batch.close());
+ }
+
+ private List<ArrowRecordBatch> createRecordBatches() {
+ List<ArrowRecordBatch> batches = new ArrayList<>();
+
+ DictionaryProvider.MapDictionaryProvider provider = new DictionaryProvider.MapDictionaryProvider();
+ provider.put(dictionary1);
+ provider.put(dictionary2);
+
+ VarCharVector vectorA1 = newVarCharVector("varcharA1", allocator);
+ vectorA1.allocateNewSafe();
+ vectorA1.set(0, "foo".getBytes(StandardCharsets.UTF_8));
+ vectorA1.set(1, "bar".getBytes(StandardCharsets.UTF_8));
+ vectorA1.set(3, "baz".getBytes(StandardCharsets.UTF_8));
+ vectorA1.set(4, "bar".getBytes(StandardCharsets.UTF_8));
+ vectorA1.set(5, "baz".getBytes(StandardCharsets.UTF_8));
+ vectorA1.setValueCount(6);
+
+ VarCharVector vectorA2 = newVarCharVector("varcharA2", allocator);
+ vectorA2.setValueCount(6);
+ FieldVector encodedVectorA1 = (FieldVector) DictionaryEncoder.encode(vectorA1, dictionary1);
+ vectorA1.close();
+ FieldVector encodedVectorA2 = (FieldVector) DictionaryEncoder.encode(vectorA1, dictionary2);
+ vectorA2.close();
+
+ List<Field> fields = Arrays.asList(encodedVectorA1.getField(), encodedVectorA2.getField());
+ List<FieldVector> vectors = Collections2.asImmutableList(encodedVectorA1, encodedVectorA2);
+ VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors, encodedVectorA1.getValueCount());
+ VectorUnloader unloader = new VectorUnloader(root);
+ batches.add(unloader.getRecordBatch());
+ root.close();
+
+ VarCharVector vectorB1 = newVarCharVector("varcharB1", allocator);
+ vectorB1.setValueCount(6);
+
+ VarCharVector vectorB2 = newVarCharVector("varcharB2", allocator);
+ vectorB2.allocateNew();
+ vectorB2.setValueCount(6);
+ vectorB2.set(0, "aa".getBytes(StandardCharsets.UTF_8));
+ vectorB2.set(1, "aa".getBytes(StandardCharsets.UTF_8));
+ vectorB2.set(3, "bb".getBytes(StandardCharsets.UTF_8));
+ vectorB2.set(4, "bb".getBytes(StandardCharsets.UTF_8));
+ vectorB2.set(5, "cc".getBytes(StandardCharsets.UTF_8));
+ vectorB2.setValueCount(6);
+ FieldVector encodedVectorB1 = (FieldVector) DictionaryEncoder.encode(vectorB1, dictionary1);
+ vectorB1.close();
+ FieldVector encodedVectorB2 = (FieldVector) DictionaryEncoder.encode(vectorB2, dictionary2);
+ vectorB2.close();
+
+ List<Field> fieldsB = Arrays.asList(encodedVectorB1.getField(), encodedVectorB2.getField());
+ List<FieldVector> vectorsB = Collections2.asImmutableList(encodedVectorB1, encodedVectorB2);
+ VectorSchemaRoot rootB = new VectorSchemaRoot(fieldsB, vectorsB, 6);
+ VectorUnloader unloaderB = new VectorUnloader(rootB);
+ batches.add(unloaderB.getRecordBatch());
+ rootB.close();
+
+ List<Field> schemaFields = new ArrayList<>();
+ schemaFields.add(DictionaryUtility.toMessageFormat(encodedVectorA1.getField(), provider, new HashSet<>()));
+ schemaFields.add(DictionaryUtility.toMessageFormat(encodedVectorA2.getField(), provider, new HashSet<>()));
+ schema = new Schema(schemaFields);
+
+ encodedSchema = new Schema(Arrays.asList(encodedVectorA1.getField(), encodedVectorA2.getField()));
+
+ return batches;
+ }
+
+ @Test
+ public void testLegacyIpcBackwardsCompatibility() throws Exception {
+ Schema schema = new Schema(asList(Field.nullable("field", new ArrowType.Int(32, true))));
+ IntVector vector = new IntVector("vector", allocator);
+ final int valueCount = 2;
+ vector.setValueCount(valueCount);
+ vector.setSafe(0, 1);
+ vector.setSafe(1, 2);
+ ArrowRecordBatch batch = new ArrowRecordBatch(valueCount, asList(new ArrowFieldNode(valueCount, 0)),
+ asList(vector.getValidityBuffer(), vector.getDataBuffer()));
+
+ ByteArrayOutputStream outStream = new ByteArrayOutputStream();
+ WriteChannel out = new WriteChannel(newChannel(outStream));
+
+ // write legacy ipc format
+ IpcOption option = new IpcOption(true, MetadataVersion.DEFAULT);
+ MessageSerializer.serialize(out, schema, option);
+ MessageSerializer.serialize(out, batch);
+
+ ReadChannel in = new ReadChannel(newChannel(new ByteArrayInputStream(outStream.toByteArray())));
+ Schema readSchema = MessageSerializer.deserializeSchema(in);
+ assertEquals(schema, readSchema);
+ ArrowRecordBatch readBatch = MessageSerializer.deserializeRecordBatch(in, allocator);
+ assertEquals(batch.getLength(), readBatch.getLength());
+ assertEquals(batch.computeBodyLength(), readBatch.computeBodyLength());
+ readBatch.close();
+
+ // write ipc format with continuation
+ option = IpcOption.DEFAULT;
+ MessageSerializer.serialize(out, schema, option);
+ MessageSerializer.serialize(out, batch);
+
+ ReadChannel in2 = new ReadChannel(newChannel(new ByteArrayInputStream(outStream.toByteArray())));
+ Schema readSchema2 = MessageSerializer.deserializeSchema(in2);
+ assertEquals(schema, readSchema2);
+ ArrowRecordBatch readBatch2 = MessageSerializer.deserializeRecordBatch(in2, allocator);
+ assertEquals(batch.getLength(), readBatch2.getLength());
+ assertEquals(batch.computeBodyLength(), readBatch2.computeBodyLength());
+ readBatch2.close();
+
+ batch.close();
+ vector.close();
+ }
+
+ @Test
+ public void testChannelReadFully() throws IOException {
+ final ByteBuffer buf = ByteBuffer.allocate(4).order(ByteOrder.nativeOrder());
+ buf.putInt(200);
+ buf.rewind();
+
+ try (ReadChannel channel = new ReadChannel(Channels.newChannel(new ByteArrayInputStream(buf.array())));
+ ArrowBuf arrBuf = allocator.buffer(8)) {
+ arrBuf.setInt(0, 100);
+ arrBuf.writerIndex(4);
+ assertEquals(4, arrBuf.writerIndex());
+
+ long n = channel.readFully(arrBuf, 4);
+ assertEquals(4, n);
+ assertEquals(8, arrBuf.writerIndex());
+
+ assertEquals(100, arrBuf.getInt(0));
+ assertEquals(200, arrBuf.getInt(4));
+ }
+ }
+
+ @Test
+ public void testChannelReadFullyEos() throws IOException {
+ final ByteBuffer buf = ByteBuffer.allocate(4).order(ByteOrder.nativeOrder());
+ buf.putInt(10);
+ buf.rewind();
+
+ try (ReadChannel channel = new ReadChannel(Channels.newChannel(new ByteArrayInputStream(buf.array())));
+ ArrowBuf arrBuf = allocator.buffer(8)) {
+ int n = channel.readFully(arrBuf.nioBuffer(0, 8));
+ assertEquals(4, n);
+
+ // the input has only 4 bytes, so the number of bytes read should be 4
+ assertEquals(4, channel.bytesRead());
+
+ // the first 4 bytes have been read successfully.
+ assertEquals(10, arrBuf.getInt(0));
+ }
+ }
+
+ @Test
+ public void testCustomMetaData() throws IOException {
+
+ VarCharVector vector = newVarCharVector("varchar1", allocator);
+
+ List<Field> fields = Arrays.asList(vector.getField());
+ List<FieldVector> vectors = Collections2.asImmutableList(vector);
+ Map<String, String> metadata = new HashMap<>();
+ metadata.put("key1", "value1");
+ metadata.put("key2", "value2");
+ try (VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors, vector.getValueCount());
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ ArrowFileWriter writer = new ArrowFileWriter(root, null, newChannel(out), metadata);) {
+
+ writer.start();
+ writer.end();
+
+ try (SeekableReadChannel channel = new SeekableReadChannel(
+ new ByteArrayReadableSeekableByteChannel(out.toByteArray()));
+ ArrowFileReader reader = new ArrowFileReader(channel, allocator)) {
+ reader.getVectorSchemaRoot();
+
+ Map<String, String> readMeta = reader.getMetaData();
+ assertEquals(2, readMeta.size());
+ assertEquals("value1", readMeta.get("key1"));
+ assertEquals("value2", readMeta.get("key2"));
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStream.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStream.java
new file mode 100644
index 000000000..9348cd3a6
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStream.java
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.channels.Channels;
+import java.util.Collections;
+
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.TinyIntVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestArrowStream extends BaseFileTest {
+ @Test
+ public void testEmptyStream() throws IOException {
+ Schema schema = MessageSerializerTest.testSchema();
+ VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator);
+
+ // Write the stream.
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ ArrowStreamWriter writer = new ArrowStreamWriter(root, null, out);
+ writer.close();
+ Assert.assertTrue(out.size() > 0);
+
+ ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
+ try (ArrowStreamReader reader = new ArrowStreamReader(in, allocator)) {
+ assertEquals(schema, reader.getVectorSchemaRoot().getSchema());
+ // Empty should return false
+ Assert.assertFalse(reader.loadNextBatch());
+ assertEquals(0, reader.getVectorSchemaRoot().getRowCount());
+ Assert.assertFalse(reader.loadNextBatch());
+ assertEquals(0, reader.getVectorSchemaRoot().getRowCount());
+ }
+ }
+
+ @Test
+ public void testStreamZeroLengthBatch() throws IOException {
+ ByteArrayOutputStream os = new ByteArrayOutputStream();
+
+ try (IntVector vector = new IntVector("foo", allocator);) {
+ Schema schema = new Schema(Collections.singletonList(vector.getField()));
+ try (VectorSchemaRoot root =
+ new VectorSchemaRoot(schema, Collections.singletonList(vector), vector.getValueCount());
+ ArrowStreamWriter writer = new ArrowStreamWriter(root, null, Channels.newChannel(os));) {
+ vector.setValueCount(0);
+ root.setRowCount(0);
+ writer.writeBatch();
+ writer.end();
+ }
+ }
+
+ ByteArrayInputStream in = new ByteArrayInputStream(os.toByteArray());
+
+ try (ArrowStreamReader reader = new ArrowStreamReader(in, allocator);) {
+ VectorSchemaRoot root = reader.getVectorSchemaRoot();
+ IntVector vector = (IntVector) root.getFieldVectors().get(0);
+ reader.loadNextBatch();
+ assertEquals(vector.getValueCount(), 0);
+ assertEquals(root.getRowCount(), 0);
+ }
+ }
+
+ @Test
+ public void testReadWrite() throws IOException {
+ Schema schema = MessageSerializerTest.testSchema();
+ try (VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) {
+ int numBatches = 1;
+
+ root.getFieldVectors().get(0).allocateNew();
+ TinyIntVector vector = (TinyIntVector) root.getFieldVectors().get(0);
+ for (int i = 0; i < 16; i++) {
+ vector.set(i, i < 8 ? 1 : 0, (byte) (i + 1));
+ }
+ vector.setValueCount(16);
+ root.setRowCount(16);
+
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ long bytesWritten = 0;
+ try (ArrowStreamWriter writer = new ArrowStreamWriter(root, null, out)) {
+ writer.start();
+ for (int i = 0; i < numBatches; i++) {
+ writer.writeBatch();
+ }
+ writer.end();
+ bytesWritten = writer.bytesWritten();
+ }
+
+ ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
+ try (ArrowStreamReader reader = new ArrowStreamReader(in, allocator)) {
+ Schema readSchema = reader.getVectorSchemaRoot().getSchema();
+ assertEquals(schema, readSchema);
+ for (int i = 0; i < numBatches; i++) {
+ assertTrue(reader.loadNextBatch());
+ }
+ // TODO figure out why reader isn't getting padding bytes
+ assertEquals(bytesWritten, reader.bytesRead() + 8);
+ assertFalse(reader.loadNextBatch());
+ assertEquals(0, reader.getVectorSchemaRoot().getRowCount());
+ }
+ }
+ }
+
+ @Test
+ public void testReadWriteMultipleBatches() throws IOException {
+ ByteArrayOutputStream os = new ByteArrayOutputStream();
+
+ try (IntVector vector = new IntVector("foo", allocator);) {
+ Schema schema = new Schema(Collections.singletonList(vector.getField()));
+ try (VectorSchemaRoot root =
+ new VectorSchemaRoot(schema, Collections.singletonList(vector), vector.getValueCount());
+ ArrowStreamWriter writer = new ArrowStreamWriter(root, null, Channels.newChannel(os));) {
+ writeBatchData(writer, vector, root);
+ }
+ }
+
+ ByteArrayInputStream in = new ByteArrayInputStream(os.toByteArray());
+
+ try (ArrowStreamReader reader = new ArrowStreamReader(in, allocator);) {
+ IntVector vector = (IntVector) reader.getVectorSchemaRoot().getFieldVectors().get(0);
+ validateBatchData(reader, vector);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStreamPipe.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStreamPipe.java
new file mode 100644
index 000000000..422a63f57
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStreamPipe.java
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.nio.channels.Pipe;
+import java.nio.channels.ReadableByteChannel;
+import java.nio.channels.WritableByteChannel;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.TinyIntVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.ipc.ArrowStreamReader;
+import org.apache.arrow.vector.ipc.ArrowStreamWriter;
+import org.apache.arrow.vector.ipc.MessageSerializerTest;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestArrowStreamPipe {
+ Schema schema = MessageSerializerTest.testSchema();
+ BufferAllocator alloc = new RootAllocator(Long.MAX_VALUE);
+
+ private final class WriterThread extends Thread {
+
+ private final int numBatches;
+ private final ArrowStreamWriter writer;
+ private final VectorSchemaRoot root;
+
+ public WriterThread(int numBatches, WritableByteChannel sinkChannel)
+ throws IOException {
+ this.numBatches = numBatches;
+ BufferAllocator allocator = alloc.newChildAllocator("writer thread", 0, Integer.MAX_VALUE);
+ root = VectorSchemaRoot.create(schema, allocator);
+ writer = new ArrowStreamWriter(root, null, sinkChannel);
+ }
+
+ @Override
+ public void run() {
+ try {
+ writer.start();
+ for (int j = 0; j < numBatches; j++) {
+ root.getFieldVectors().get(0).allocateNew();
+ TinyIntVector vector = (TinyIntVector) root.getFieldVectors().get(0);
+ // Send a changing batch id first
+ vector.set(0, j);
+ for (int i = 1; i < 16; i++) {
+ vector.set(i, i < 8 ? 1 : 0, (byte) (i + 1));
+ }
+ vector.setValueCount(16);
+ root.setRowCount(16);
+
+ writer.writeBatch();
+ }
+ writer.close();
+ root.close();
+ } catch (IOException e) {
+ e.printStackTrace();
+ Assert.fail(e.toString()); // have to explicitly fail since we're in a separate thread
+ }
+ }
+
+ public long bytesWritten() {
+ return writer.bytesWritten();
+ }
+ }
+
+ private final class ReaderThread extends Thread {
+ private int batchesRead = 0;
+ private final ArrowStreamReader reader;
+ private final BufferAllocator alloc = new RootAllocator(Long.MAX_VALUE);
+ private boolean done = false;
+
+ public ReaderThread(ReadableByteChannel sourceChannel)
+ throws IOException {
+ reader = new ArrowStreamReader(sourceChannel, alloc) {
+
+ @Override
+ public boolean loadNextBatch() throws IOException {
+ if (super.loadNextBatch()) {
+ batchesRead++;
+ } else {
+ done = true;
+ return false;
+ }
+ VectorSchemaRoot root = getVectorSchemaRoot();
+ Assert.assertEquals(16, root.getRowCount());
+ TinyIntVector vector = (TinyIntVector) root.getFieldVectors().get(0);
+ Assert.assertEquals((byte) (batchesRead - 1), vector.get(0));
+ for (int i = 1; i < 16; i++) {
+ if (i < 8) {
+ Assert.assertEquals((byte) (i + 1), vector.get(i));
+ } else {
+ Assert.assertTrue(vector.isNull(i));
+ }
+ }
+
+ return true;
+ }
+ };
+ }
+
+ @Override
+ public void run() {
+ try {
+ assertEquals(schema, reader.getVectorSchemaRoot().getSchema());
+ while (!done) {
+ assertTrue(reader.loadNextBatch() != done);
+ }
+ reader.close();
+ } catch (IOException e) {
+ e.printStackTrace();
+ Assert.fail(e.toString()); // have to explicitly fail since we're in a separate thread
+ }
+ }
+
+ public int getBatchesRead() {
+ return batchesRead;
+ }
+
+ public long bytesRead() {
+ return reader.bytesRead();
+ }
+ }
+
+ // Starts up a producer and consumer thread to read/write batches.
+ @Test
+ public void pipeTest() throws IOException, InterruptedException {
+ final int NUM_BATCHES = 10;
+ Pipe pipe = Pipe.open();
+ WriterThread writer = new WriterThread(NUM_BATCHES, pipe.sink());
+ ReaderThread reader = new ReaderThread(pipe.source());
+
+ writer.start();
+ reader.start();
+ reader.join();
+ writer.join();
+
+ assertEquals(NUM_BATCHES, reader.getBatchesRead());
+ assertEquals(writer.bytesWritten(), reader.bytesRead());
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestJSONFile.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestJSONFile.java
new file mode 100644
index 000000000..f0aa226e2
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestJSONFile.java
@@ -0,0 +1,458 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.UInt1Vector;
+import org.apache.arrow.vector.UInt4Vector;
+import org.apache.arrow.vector.UInt8Vector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.impl.ComplexWriterImpl;
+import org.apache.arrow.vector.complex.writer.BaseWriter;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.dictionary.DictionaryProvider.MapDictionaryProvider;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.util.Validator;
+import org.junit.Assert;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TestJSONFile extends BaseFileTest {
+ private static final Logger LOGGER = LoggerFactory.getLogger(TestJSONFile.class);
+
+ @Test
+ public void testNoBatches() throws IOException {
+ File file = new File("target/no_batches.json");
+
+ try (BufferAllocator originalVectorAllocator =
+ allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE);
+ StructVector parent = StructVector.empty("parent", originalVectorAllocator)) {
+ BaseWriter.ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ BaseWriter.StructWriter rootWriter = writer.rootAsStruct();
+ rootWriter.integer("int");
+ rootWriter.uInt1("uint1");
+ rootWriter.bigInt("bigInt");
+ rootWriter.float4("float");
+ JsonFileWriter jsonWriter = new JsonFileWriter(file, JsonFileWriter.config().pretty(true));
+ jsonWriter.start(new VectorSchemaRoot(parent.getChild("root")).getSchema(), null);
+ jsonWriter.close();
+ }
+
+ // read
+ try (
+ BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE);
+ JsonFileReader reader = new JsonFileReader(file, readerAllocator)
+ ) {
+ Schema schema = reader.start();
+ LOGGER.debug("reading schema: " + schema);
+ }
+ }
+
+ @Test
+ public void testWriteRead() throws IOException {
+ File file = new File("target/mytest.json");
+ int count = COUNT;
+
+ // write
+ try (BufferAllocator originalVectorAllocator =
+ allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE);
+ StructVector parent = StructVector.empty("parent", originalVectorAllocator)) {
+ writeData(count, parent);
+ writeJSON(file, new VectorSchemaRoot(parent.getChild("root")), null);
+ }
+
+ // read
+ try (
+ BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE);
+ JsonFileReader reader = new JsonFileReader(file, readerAllocator)
+ ) {
+ Schema schema = reader.start();
+ LOGGER.debug("reading schema: " + schema);
+
+ // initialize vectors
+ try (VectorSchemaRoot root = reader.read();) {
+ validateContent(count, root);
+ }
+ }
+ }
+
+ @Test
+ public void testWriteReadComplexJSON() throws IOException {
+ File file = new File("target/mytest_complex.json");
+ int count = COUNT;
+
+ // write
+ try (
+ BufferAllocator originalVectorAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE);
+ StructVector parent = StructVector.empty("parent", originalVectorAllocator)) {
+ writeComplexData(count, parent);
+ writeJSON(file, new VectorSchemaRoot(parent.getChild("root")), null);
+ }
+
+ // read
+ try (
+ BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE);
+ JsonFileReader reader = new JsonFileReader(file, readerAllocator);
+ ) {
+ Schema schema = reader.start();
+ LOGGER.debug("reading schema: " + schema);
+
+ // initialize vectors
+ try (VectorSchemaRoot root = reader.read();) {
+ validateComplexContent(count, root);
+ }
+ }
+ }
+
+ @Test
+ public void testWriteComplexJSON() throws IOException {
+ File file = new File("target/mytest_write_complex.json");
+ int count = COUNT;
+ try (
+ BufferAllocator vectorAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE);
+ StructVector parent = StructVector.empty("parent", vectorAllocator)) {
+ writeComplexData(count, parent);
+ VectorSchemaRoot root = new VectorSchemaRoot(parent.getChild("root"));
+ validateComplexContent(root.getRowCount(), root);
+ writeJSON(file, root, null);
+ }
+ }
+
+ public void writeJSON(File file, VectorSchemaRoot root, DictionaryProvider provider) throws IOException {
+ JsonFileWriter writer = new JsonFileWriter(file, JsonFileWriter.config().pretty(true));
+ writer.start(root.getSchema(), provider);
+ writer.write(root);
+ writer.close();
+ }
+
+
+ @Test
+ public void testWriteReadUnionJSON() throws IOException {
+ File file = new File("target/mytest_write_union.json");
+ int count = COUNT;
+ try (
+ BufferAllocator vectorAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE);
+ StructVector parent = StructVector.empty("parent", vectorAllocator)) {
+ writeUnionData(count, parent);
+ printVectors(parent.getChildrenFromFields());
+
+ try (VectorSchemaRoot root = new VectorSchemaRoot(parent.getChild("root"))) {
+ validateUnionData(count, root);
+ writeJSON(file, root, null);
+
+ // read
+ try (BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE)) {
+ JsonFileReader reader = new JsonFileReader(file, readerAllocator);
+
+ Schema schema = reader.start();
+ LOGGER.debug("reading schema: " + schema);
+
+ try (VectorSchemaRoot rootFromJson = reader.read();) {
+ validateUnionData(count, rootFromJson);
+ Validator.compareVectorSchemaRoot(root, rootFromJson);
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testWriteReadDateTimeJSON() throws IOException {
+ File file = new File("target/mytest_datetime.json");
+ int count = COUNT;
+
+ // write
+ try (
+ BufferAllocator vectorAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE);
+ StructVector parent = StructVector.empty("parent", vectorAllocator)) {
+
+ writeDateTimeData(count, parent);
+
+ printVectors(parent.getChildrenFromFields());
+
+ VectorSchemaRoot root = new VectorSchemaRoot(parent.getChild("root"));
+ validateDateTimeContent(count, root);
+
+ writeJSON(file, new VectorSchemaRoot(parent.getChild("root")), null);
+ }
+
+ // read
+ try (
+ BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE);
+ JsonFileReader reader = new JsonFileReader(file, readerAllocator)
+ ) {
+ Schema schema = reader.start();
+ LOGGER.debug("reading schema: " + schema);
+
+ // initialize vectors
+ try (VectorSchemaRoot root = reader.read();) {
+ validateDateTimeContent(count, root);
+ }
+ }
+ }
+
+ @Test
+ public void testWriteReadDictionaryJSON() throws IOException {
+ File file = new File("target/mytest_dictionary.json");
+
+ // write
+ try (
+ BufferAllocator vectorAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE)
+ ) {
+ MapDictionaryProvider provider = new MapDictionaryProvider();
+
+ try (VectorSchemaRoot root = writeFlatDictionaryData(vectorAllocator, provider)) {
+ printVectors(root.getFieldVectors());
+ validateFlatDictionary(root, provider);
+ writeJSON(file, root, provider);
+ }
+
+ // Need to close dictionary vectors
+ for (long id : provider.getDictionaryIds()) {
+ provider.lookup(id).getVector().close();
+ }
+ }
+
+ // read
+ try (
+ BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE);
+ JsonFileReader reader = new JsonFileReader(file, readerAllocator)
+ ) {
+ Schema schema = reader.start();
+ LOGGER.debug("reading schema: " + schema);
+
+ // initialize vectors
+ try (VectorSchemaRoot root = reader.read();) {
+ validateFlatDictionary(root, reader);
+ }
+ }
+ }
+
+ @Test
+ public void testWriteReadNestedDictionaryJSON() throws IOException {
+ File file = new File("target/mytest_dict_nested.json");
+
+ // data being written:
+ // [['foo', 'bar'], ['foo'], ['bar']] -> [[0, 1], [0], [1]]
+
+ // write
+ try (
+ BufferAllocator vectorAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE)
+ ) {
+ MapDictionaryProvider provider = new MapDictionaryProvider();
+
+ try (VectorSchemaRoot root = writeNestedDictionaryData(vectorAllocator, provider)) {
+ printVectors(root.getFieldVectors());
+ validateNestedDictionary(root, provider);
+ writeJSON(file, root, provider);
+ }
+
+ // Need to close dictionary vectors
+ for (long id : provider.getDictionaryIds()) {
+ provider.lookup(id).getVector().close();
+ }
+ }
+
+ // read
+ try (
+ BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE);
+ JsonFileReader reader = new JsonFileReader(file, readerAllocator)
+ ) {
+ Schema schema = reader.start();
+ LOGGER.debug("reading schema: " + schema);
+
+ // initialize vectors
+ try (VectorSchemaRoot root = reader.read();) {
+ validateNestedDictionary(root, reader);
+ }
+ }
+ }
+
+ @Test
+ public void testWriteReadDecimalJSON() throws IOException {
+ File file = new File("target/mytest_decimal.json");
+
+ // write
+ try (BufferAllocator vectorAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE);
+ VectorSchemaRoot root = writeDecimalData(vectorAllocator)) {
+ printVectors(root.getFieldVectors());
+ validateDecimalData(root);
+ writeJSON(file, root, null);
+ }
+
+ // read
+ try (
+ BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE);
+ JsonFileReader reader = new JsonFileReader(file, readerAllocator)
+ ) {
+ Schema schema = reader.start();
+ LOGGER.debug("reading schema: " + schema);
+
+ // initialize vectors
+ try (VectorSchemaRoot root = reader.read();) {
+ validateDecimalData(root);
+ }
+ }
+ }
+
+ @Test
+ public void testSetStructLength() throws IOException {
+ File file = new File("../../docs/source/format/integration_json_examples/struct.json");
+ if (!file.exists()) {
+ file = new File("../docs/source/format/integration_json_examples/struct.json");
+ }
+ try (
+ BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE);
+ JsonFileReader reader = new JsonFileReader(file, readerAllocator)
+ ) {
+ Schema schema = reader.start();
+ LOGGER.debug("reading schema: " + schema);
+
+ // initialize vectors
+ try (VectorSchemaRoot root = reader.read();) {
+ FieldVector vector = root.getVector("struct_nullable");
+ Assert.assertEquals(7, vector.getValueCount());
+ }
+ }
+ }
+
+ @Test
+ public void testWriteReadVarBinJSON() throws IOException {
+ File file = new File("target/mytest_varbin.json");
+ int count = COUNT;
+
+ // write
+ try (
+ BufferAllocator vectorAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE);
+ StructVector parent = StructVector.empty("parent", vectorAllocator)) {
+ writeVarBinaryData(count, parent);
+ VectorSchemaRoot root = new VectorSchemaRoot(parent.getChild("root"));
+ validateVarBinary(count, root);
+ writeJSON(file, new VectorSchemaRoot(parent.getChild("root")), null);
+ }
+
+ // read
+ try (BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE);
+ JsonFileReader reader = new JsonFileReader(file, readerAllocator)) {
+ Schema schema = reader.start();
+ LOGGER.debug("reading schema: " + schema);
+
+ // initialize vectors
+ try (VectorSchemaRoot root = reader.read();) {
+ validateVarBinary(count, root);
+ }
+ }
+ }
+
+ @Test
+ public void testWriteReadMapJSON() throws IOException {
+ File file = new File("target/mytest_map.json");
+
+ // write
+ try (BufferAllocator vectorAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE);
+ VectorSchemaRoot root = writeMapData(vectorAllocator)) {
+ printVectors(root.getFieldVectors());
+ validateMapData(root);
+ writeJSON(file, root, null);
+ }
+
+ // read
+ try (BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE);
+ JsonFileReader reader = new JsonFileReader(file, readerAllocator)) {
+ Schema schema = reader.start();
+ LOGGER.debug("reading schema: " + schema);
+
+ // initialize vectors
+ try (VectorSchemaRoot root = reader.read();) {
+ validateMapData(root);
+ }
+ }
+ }
+
+ @Test
+ public void testWriteReadNullJSON() throws IOException {
+ File file = new File("target/mytest_null.json");
+ int valueCount = 10;
+
+ // write
+ try (BufferAllocator vectorAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE);
+ VectorSchemaRoot root = writeNullData(valueCount)) {
+ printVectors(root.getFieldVectors());
+ validateNullData(root, valueCount);
+ writeJSON(file, root, null);
+ }
+
+ // read
+ try (
+ BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE);
+ JsonFileReader reader = new JsonFileReader(file, readerAllocator)
+ ) {
+
+ Schema schema = reader.start();
+ LOGGER.debug("reading schema: " + schema);
+
+ // initialize vectors
+ try (VectorSchemaRoot root = reader.read();) {
+ validateNullData(root, valueCount);
+ }
+ }
+ }
+
+ @Test
+ public void testNoOverFlowWithUINT() {
+ try (final UInt8Vector uInt8Vector = new UInt8Vector("uint8", allocator);
+ final UInt4Vector uInt4Vector = new UInt4Vector("uint4", allocator);
+ final UInt1Vector uInt1Vector = new UInt1Vector("uint1", allocator)) {
+
+ long[] longValues = new long[]{Long.MIN_VALUE, Long.MAX_VALUE, -1L};
+ uInt8Vector.allocateNew(3);
+ uInt8Vector.setValueCount(3);
+ for (int i = 0; i < longValues.length; i++) {
+ uInt8Vector.set(i, longValues[i]);
+ long readValue = uInt8Vector.getObjectNoOverflow(i).longValue();
+ assertEquals(readValue, longValues[i]);
+ }
+
+ int[] intValues = new int[]{Integer.MIN_VALUE, Integer.MAX_VALUE, -1};
+ uInt4Vector.allocateNew(3);
+ uInt4Vector.setValueCount(3);
+ for (int i = 0; i < intValues.length; i++) {
+ uInt4Vector.set(i, intValues[i]);
+ int actualValue = (int) UInt4Vector.getNoOverflow(uInt4Vector.getDataBuffer(), i);
+ assertEquals(intValues[i], actualValue);
+ }
+
+ byte[] byteValues = new byte[]{Byte.MIN_VALUE, Byte.MAX_VALUE, -1};
+ uInt1Vector.allocateNew(3);
+ uInt1Vector.setValueCount(3);
+ for (int i = 0; i < byteValues.length; i++) {
+ uInt1Vector.set(i, byteValues[i]);
+ byte actualValue = (byte) UInt1Vector.getNoOverflow(uInt1Vector.getDataBuffer(), i);
+ assertEquals(byteValues[i], actualValue);
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestRoundTrip.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestRoundTrip.java
new file mode 100644
index 000000000..5f57e90f6
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestRoundTrip.java
@@ -0,0 +1,628 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import static org.apache.arrow.vector.dictionary.DictionaryProvider.MapDictionaryProvider;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.nio.channels.Channels;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.function.BiConsumer;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.util.Collections2;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.FixedSizeBinaryVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.TinyIntVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.VectorUnloader;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.ipc.message.ArrowBlock;
+import org.apache.arrow.vector.ipc.message.ArrowBuffer;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.ipc.message.IpcOption;
+import org.apache.arrow.vector.ipc.message.MessageMetadataResult;
+import org.apache.arrow.vector.ipc.message.MessageSerializer;
+import org.apache.arrow.vector.types.FloatingPointPrecision;
+import org.apache.arrow.vector.types.MetadataVersion;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.AfterClass;
+import org.junit.Assume;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@RunWith(Parameterized.class)
+public class TestRoundTrip extends BaseFileTest {
+ private static final Logger LOGGER = LoggerFactory.getLogger(TestRoundTrip.class);
+ private static BufferAllocator allocator;
+ private final String name;
+ private final IpcOption writeOption;
+
+ public TestRoundTrip(String name, IpcOption writeOption) {
+ this.name = name;
+ this.writeOption = writeOption;
+ }
+
+ @Parameterized.Parameters(name = "options = {0}")
+ public static Collection<Object[]> getWriteOption() {
+ final IpcOption legacy = new IpcOption(true, MetadataVersion.V4);
+ final IpcOption version4 = new IpcOption(false, MetadataVersion.V4);
+ return Arrays.asList(
+ new Object[] {"V4Legacy", legacy},
+ new Object[] {"V4", version4},
+ new Object[] {"V5", IpcOption.DEFAULT}
+ );
+ }
+
+ @BeforeClass
+ public static void setUpClass() {
+ allocator = new RootAllocator(Integer.MAX_VALUE);
+ }
+
+ @AfterClass
+ public static void tearDownClass() {
+ allocator.close();
+ }
+
+ @Test
+ public void testStruct() throws Exception {
+ try (final BufferAllocator originalVectorAllocator =
+ allocator.newChildAllocator("original vectors", 0, allocator.getLimit());
+ final StructVector parent = StructVector.empty("parent", originalVectorAllocator)) {
+ writeData(COUNT, parent);
+ roundTrip(
+ new VectorSchemaRoot(parent.getChild("root")),
+ /* dictionaryProvider */null,
+ TestRoundTrip::writeSingleBatch,
+ validateFileBatches(new int[] {COUNT}, this::validateContent),
+ validateStreamBatches(new int[] {COUNT}, this::validateContent));
+ }
+ }
+
+ @Test
+ public void testComplex() throws Exception {
+ try (final BufferAllocator originalVectorAllocator =
+ allocator.newChildAllocator("original vectors", 0, allocator.getLimit());
+ final StructVector parent = StructVector.empty("parent", originalVectorAllocator)) {
+ writeComplexData(COUNT, parent);
+ roundTrip(
+ new VectorSchemaRoot(parent.getChild("root")),
+ /* dictionaryProvider */null,
+ TestRoundTrip::writeSingleBatch,
+ validateFileBatches(new int[] {COUNT}, this::validateComplexContent),
+ validateStreamBatches(new int[] {COUNT}, this::validateComplexContent));
+ }
+ }
+
+ @Test
+ public void testMultipleRecordBatches() throws Exception {
+ int[] counts = {10, 5};
+ try (final BufferAllocator originalVectorAllocator =
+ allocator.newChildAllocator("original vectors", 0, allocator.getLimit());
+ final StructVector parent = StructVector.empty("parent", originalVectorAllocator)) {
+ writeData(counts[0], parent);
+ roundTrip(
+ new VectorSchemaRoot(parent.getChild("root")),
+ /* dictionaryProvider */null,
+ (root, writer) -> {
+ writer.start();
+ parent.allocateNew();
+ writeData(counts[0], parent);
+ root.setRowCount(counts[0]);
+ writer.writeBatch();
+
+ parent.allocateNew();
+ // if we write the same data we don't catch that the metadata is stored in the wrong order.
+ writeData(counts[1], parent);
+ root.setRowCount(counts[1]);
+ writer.writeBatch();
+
+ writer.end();
+ },
+ validateFileBatches(counts, this::validateContent),
+ validateStreamBatches(counts, this::validateContent));
+ }
+ }
+
+ @Test
+ public void testUnionV4() throws Exception {
+ Assume.assumeTrue(writeOption.metadataVersion == MetadataVersion.V4);
+ final File temp = File.createTempFile("arrow-test-" + name + "-", ".arrow");
+ temp.deleteOnExit();
+ final ByteArrayOutputStream memoryStream = new ByteArrayOutputStream();
+
+ try (final BufferAllocator originalVectorAllocator =
+ allocator.newChildAllocator("original vectors", 0, allocator.getLimit());
+ final StructVector parent = StructVector.empty("parent", originalVectorAllocator)) {
+ writeUnionData(COUNT, parent);
+ final VectorSchemaRoot root = new VectorSchemaRoot(parent.getChild("root"));
+ IllegalArgumentException e = assertThrows(IllegalArgumentException.class, () -> {
+ try (final FileOutputStream fileStream = new FileOutputStream(temp)) {
+ new ArrowFileWriter(root, null, fileStream.getChannel(), writeOption);
+ new ArrowStreamWriter(root, null, Channels.newChannel(memoryStream), writeOption);
+ }
+ });
+ assertTrue(e.getMessage(), e.getMessage().contains("Cannot write union with V4 metadata"));
+ e = assertThrows(IllegalArgumentException.class, () -> {
+ new ArrowStreamWriter(root, null, Channels.newChannel(memoryStream), writeOption);
+ });
+ assertTrue(e.getMessage(), e.getMessage().contains("Cannot write union with V4 metadata"));
+ }
+ }
+
+ @Test
+ public void testUnionV5() throws Exception {
+ Assume.assumeTrue(writeOption.metadataVersion == MetadataVersion.V5);
+ try (final BufferAllocator originalVectorAllocator =
+ allocator.newChildAllocator("original vectors", 0, allocator.getLimit());
+ final StructVector parent = StructVector.empty("parent", originalVectorAllocator)) {
+ writeUnionData(COUNT, parent);
+ VectorSchemaRoot root = new VectorSchemaRoot(parent.getChild("root"));
+ validateUnionData(COUNT, root);
+ roundTrip(
+ root,
+ /* dictionaryProvider */null,
+ TestRoundTrip::writeSingleBatch,
+ validateFileBatches(new int[] {COUNT}, this::validateUnionData),
+ validateStreamBatches(new int[] {COUNT}, this::validateUnionData));
+ }
+ }
+
+ @Test
+ public void testTiny() throws Exception {
+ try (final VectorSchemaRoot root = VectorSchemaRoot.create(MessageSerializerTest.testSchema(), allocator)) {
+ root.getFieldVectors().get(0).allocateNew();
+ int count = 16;
+ TinyIntVector vector = (TinyIntVector) root.getFieldVectors().get(0);
+ for (int i = 0; i < count; i++) {
+ vector.set(i, i < 8 ? 1 : 0, (byte) (i + 1));
+ }
+ vector.setValueCount(count);
+ root.setRowCount(count);
+
+ roundTrip(
+ root,
+ /* dictionaryProvider */null,
+ TestRoundTrip::writeSingleBatch,
+ validateFileBatches(new int[] {count}, this::validateTinyData),
+ validateStreamBatches(new int[] {count}, this::validateTinyData));
+ }
+ }
+
+ private void validateTinyData(int count, VectorSchemaRoot root) {
+ assertEquals(count, root.getRowCount());
+ TinyIntVector vector = (TinyIntVector) root.getFieldVectors().get(0);
+ for (int i = 0; i < count; i++) {
+ if (i < 8) {
+ assertEquals((byte) (i + 1), vector.get(i));
+ } else {
+ assertTrue(vector.isNull(i));
+ }
+ }
+ }
+
+ @Test
+ public void testMetadata() throws Exception {
+ List<Field> childFields = new ArrayList<>();
+ childFields.add(new Field("varchar-child", new FieldType(true, ArrowType.Utf8.INSTANCE, null, metadata(1)), null));
+ childFields.add(new Field("float-child",
+ new FieldType(true, new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE), null, metadata(2)), null));
+ childFields.add(new Field("int-child", new FieldType(false, new ArrowType.Int(32, true), null, metadata(3)), null));
+ childFields.add(new Field("list-child", new FieldType(true, ArrowType.List.INSTANCE, null, metadata(4)),
+ Collections2.asImmutableList(new Field("l1", FieldType.nullable(new ArrowType.Int(16, true)), null))));
+ Field field = new Field("meta", new FieldType(true, ArrowType.Struct.INSTANCE, null, metadata(0)), childFields);
+ Map<String, String> metadata = new HashMap<>();
+ metadata.put("s1", "v1");
+ metadata.put("s2", "v2");
+ Schema originalSchema = new Schema(Collections2.asImmutableList(field), metadata);
+ assertEquals(metadata, originalSchema.getCustomMetadata());
+
+ try (final BufferAllocator originalVectorAllocator =
+ allocator.newChildAllocator("original vectors", 0, allocator.getLimit());
+ final StructVector vector = (StructVector) field.createVector(originalVectorAllocator)) {
+ vector.allocateNewSafe();
+ vector.setValueCount(0);
+
+ List<FieldVector> vectors = Collections2.asImmutableList(vector);
+ VectorSchemaRoot root = new VectorSchemaRoot(originalSchema, vectors, 0);
+
+ BiConsumer<Integer, VectorSchemaRoot> validate = (count, readRoot) -> {
+ Schema schema = readRoot.getSchema();
+ assertEquals(originalSchema, schema);
+ assertEquals(originalSchema.getCustomMetadata(), schema.getCustomMetadata());
+ Field top = schema.getFields().get(0);
+ assertEquals(metadata(0), top.getMetadata());
+ for (int i = 0; i < 4; i++) {
+ assertEquals(metadata(i + 1), top.getChildren().get(i).getMetadata());
+ }
+ };
+ roundTrip(
+ root,
+ /* dictionaryProvider */null,
+ TestRoundTrip::writeSingleBatch,
+ validateFileBatches(new int[] {0}, validate),
+ validateStreamBatches(new int[] {0}, validate));
+ }
+ }
+
+ private Map<String, String> metadata(int i) {
+ Map<String, String> map = new HashMap<>();
+ map.put("k_" + i, "v_" + i);
+ map.put("k2_" + i, "v2_" + i);
+ return Collections.unmodifiableMap(map);
+ }
+
+ @Test
+ public void testFlatDictionary() throws Exception {
+ AtomicInteger numDictionaryBlocksWritten = new AtomicInteger();
+ MapDictionaryProvider provider = new MapDictionaryProvider();
+ try (final BufferAllocator originalVectorAllocator =
+ allocator.newChildAllocator("original vectors", 0, allocator.getLimit());
+ final VectorSchemaRoot root = writeFlatDictionaryData(originalVectorAllocator, provider)) {
+ roundTrip(
+ root,
+ provider,
+ (ignored, writer) -> {
+ writer.start();
+ writer.writeBatch();
+ writer.end();
+ if (writer instanceof ArrowFileWriter) {
+ numDictionaryBlocksWritten.set(((ArrowFileWriter) writer).getDictionaryBlocks().size());
+ }
+ },
+ (fileReader) -> {
+ VectorSchemaRoot readRoot = fileReader.getVectorSchemaRoot();
+ Schema schema = readRoot.getSchema();
+ LOGGER.debug("reading schema: " + schema);
+ assertTrue(fileReader.loadNextBatch());
+ validateFlatDictionary(readRoot, fileReader);
+ assertEquals(numDictionaryBlocksWritten.get(), fileReader.getDictionaryBlocks().size());
+ },
+ (streamReader) -> {
+ VectorSchemaRoot readRoot = streamReader.getVectorSchemaRoot();
+ Schema schema = readRoot.getSchema();
+ LOGGER.debug("reading schema: " + schema);
+ assertTrue(streamReader.loadNextBatch());
+ validateFlatDictionary(readRoot, streamReader);
+ });
+
+ // Need to close dictionary vectors
+ for (long id : provider.getDictionaryIds()) {
+ provider.lookup(id).getVector().close();
+ }
+ }
+ }
+
+ @Test
+ public void testNestedDictionary() throws Exception {
+ AtomicInteger numDictionaryBlocksWritten = new AtomicInteger();
+ MapDictionaryProvider provider = new MapDictionaryProvider();
+ // data being written:
+ // [['foo', 'bar'], ['foo'], ['bar']] -> [[0, 1], [0], [1]]
+ try (final BufferAllocator originalVectorAllocator =
+ allocator.newChildAllocator("original vectors", 0, allocator.getLimit());
+ final VectorSchemaRoot root = writeNestedDictionaryData(originalVectorAllocator, provider)) {
+ CheckedConsumer<ArrowReader> validateDictionary = (streamReader) -> {
+ VectorSchemaRoot readRoot = streamReader.getVectorSchemaRoot();
+ Schema schema = readRoot.getSchema();
+ LOGGER.debug("reading schema: " + schema);
+ assertTrue(streamReader.loadNextBatch());
+ validateNestedDictionary(readRoot, streamReader);
+ };
+ roundTrip(
+ root,
+ provider,
+ (ignored, writer) -> {
+ writer.start();
+ writer.writeBatch();
+ writer.end();
+ if (writer instanceof ArrowFileWriter) {
+ numDictionaryBlocksWritten.set(((ArrowFileWriter) writer).getDictionaryBlocks().size());
+ }
+ },
+ validateDictionary,
+ validateDictionary);
+
+ // Need to close dictionary vectors
+ for (long id : provider.getDictionaryIds()) {
+ provider.lookup(id).getVector().close();
+ }
+ }
+ }
+
+ @Test
+ public void testFixedSizeBinary() throws Exception {
+ final int count = 10;
+ final int typeWidth = 11;
+ byte[][] byteValues = new byte[count][typeWidth];
+ for (int i = 0; i < count; i++) {
+ for (int j = 0; j < typeWidth; j++) {
+ byteValues[i][j] = ((byte) i);
+ }
+ }
+
+ BiConsumer<Integer, VectorSchemaRoot> validator = (expectedCount, root) -> {
+ for (int i = 0; i < expectedCount; i++) {
+ assertArrayEquals(byteValues[i], ((byte[]) root.getVector("fixed-binary").getObject(i)));
+ }
+ };
+
+ try (final BufferAllocator originalVectorAllocator =
+ allocator.newChildAllocator("original vectors", 0, allocator.getLimit());
+ final StructVector parent = StructVector.empty("parent", originalVectorAllocator)) {
+ FixedSizeBinaryVector fixedSizeBinaryVector = parent.addOrGet("fixed-binary",
+ FieldType.nullable(new ArrowType.FixedSizeBinary(typeWidth)), FixedSizeBinaryVector.class);
+ parent.allocateNew();
+ for (int i = 0; i < count; i++) {
+ fixedSizeBinaryVector.set(i, byteValues[i]);
+ }
+ parent.setValueCount(count);
+
+ roundTrip(
+ new VectorSchemaRoot(parent),
+ /* dictionaryProvider */null,
+ TestRoundTrip::writeSingleBatch,
+ validateFileBatches(new int[] {count}, validator),
+ validateStreamBatches(new int[] {count}, validator));
+ }
+ }
+
+ @Test
+ public void testFixedSizeList() throws Exception {
+ BiConsumer<Integer, VectorSchemaRoot> validator = (expectedCount, root) -> {
+ for (int i = 0; i < expectedCount; i++) {
+ assertEquals(Collections2.asImmutableList(i + 0.1f, i + 10.1f), root.getVector("float-pairs")
+ .getObject(i));
+ assertEquals(i, root.getVector("ints").getObject(i));
+ }
+ };
+
+ try (final BufferAllocator originalVectorAllocator =
+ allocator.newChildAllocator("original vectors", 0, allocator.getLimit());
+ final StructVector parent = StructVector.empty("parent", originalVectorAllocator)) {
+ FixedSizeListVector tuples = parent.addOrGet("float-pairs",
+ FieldType.nullable(new ArrowType.FixedSizeList(2)), FixedSizeListVector.class);
+ Float4Vector floats = (Float4Vector) tuples.addOrGetVector(FieldType.nullable(Types.MinorType.FLOAT4.getType()))
+ .getVector();
+ IntVector ints = parent.addOrGet("ints", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ parent.allocateNew();
+ for (int i = 0; i < COUNT; i++) {
+ tuples.setNotNull(i);
+ floats.set(i * 2, i + 0.1f);
+ floats.set(i * 2 + 1, i + 10.1f);
+ ints.set(i, i);
+ }
+ parent.setValueCount(COUNT);
+
+ roundTrip(
+ new VectorSchemaRoot(parent),
+ /* dictionaryProvider */null,
+ TestRoundTrip::writeSingleBatch,
+ validateFileBatches(new int[] {COUNT}, validator),
+ validateStreamBatches(new int[] {COUNT}, validator));
+ }
+ }
+
+ @Test
+ public void testVarBinary() throws Exception {
+ try (final BufferAllocator originalVectorAllocator =
+ allocator.newChildAllocator("original vectors", 0, allocator.getLimit());
+ final StructVector parent = StructVector.empty("parent", originalVectorAllocator)) {
+ writeVarBinaryData(COUNT, parent);
+ VectorSchemaRoot root = new VectorSchemaRoot(parent.getChild("root"));
+ validateVarBinary(COUNT, root);
+
+ roundTrip(
+ root,
+ /* dictionaryProvider */null,
+ TestRoundTrip::writeSingleBatch,
+ validateFileBatches(new int[]{COUNT}, this::validateVarBinary),
+ validateStreamBatches(new int[]{COUNT}, this::validateVarBinary));
+ }
+ }
+
+ @Test
+ public void testReadWriteMultipleBatches() throws IOException {
+ File file = new File("target/mytest_nulls_multibatch.arrow");
+ int numBlocksWritten = 0;
+
+ try (IntVector vector = new IntVector("foo", allocator);) {
+ Schema schema = new Schema(Collections.singletonList(vector.getField()));
+ try (FileOutputStream fileOutputStream = new FileOutputStream(file);
+ VectorSchemaRoot root =
+ new VectorSchemaRoot(schema, Collections.singletonList((FieldVector) vector), vector.getValueCount());
+ ArrowFileWriter writer = new ArrowFileWriter(root, null, fileOutputStream.getChannel(), writeOption)) {
+ writeBatchData(writer, vector, root);
+ numBlocksWritten = writer.getRecordBlocks().size();
+ }
+ }
+
+ try (FileInputStream fileInputStream = new FileInputStream(file);
+ ArrowFileReader reader = new ArrowFileReader(fileInputStream.getChannel(), allocator);) {
+ IntVector vector = (IntVector) reader.getVectorSchemaRoot().getFieldVectors().get(0);
+ validateBatchData(reader, vector);
+ assertEquals(numBlocksWritten, reader.getRecordBlocks().size());
+ }
+ }
+
+ @Test
+ public void testMap() throws Exception {
+ try (final BufferAllocator originalVectorAllocator =
+ allocator.newChildAllocator("original vectors", 0, allocator.getLimit());
+ final VectorSchemaRoot root = writeMapData(originalVectorAllocator)) {
+ roundTrip(
+ root,
+ /* dictionaryProvider */null,
+ TestRoundTrip::writeSingleBatch,
+ validateFileBatches(new int[]{root.getRowCount()}, (count, readRoot) -> validateMapData(readRoot)),
+ validateStreamBatches(new int[]{root.getRowCount()}, (count, readRoot) -> validateMapData(readRoot)));
+ }
+ }
+
+ @Test
+ public void testListAsMap() throws Exception {
+ try (final BufferAllocator originalVectorAllocator =
+ allocator.newChildAllocator("original vectors", 0, allocator.getLimit());
+ final VectorSchemaRoot root = writeListAsMapData(originalVectorAllocator)) {
+ roundTrip(
+ root,
+ /* dictionaryProvider */null,
+ TestRoundTrip::writeSingleBatch,
+ validateFileBatches(new int[]{root.getRowCount()}, (count, readRoot) -> validateListAsMapData(readRoot)),
+ validateStreamBatches(new int[]{root.getRowCount()}, (count, readRoot) -> validateListAsMapData(readRoot)));
+ }
+ }
+
+ // Generic test helpers
+
+ private static void writeSingleBatch(VectorSchemaRoot root, ArrowWriter writer) throws IOException {
+ writer.start();
+ writer.writeBatch();
+ writer.end();
+ }
+
+ private CheckedConsumer<ArrowFileReader> validateFileBatches(
+ int[] counts, BiConsumer<Integer, VectorSchemaRoot> validator) {
+ return (arrowReader) -> {
+ VectorSchemaRoot root = arrowReader.getVectorSchemaRoot();
+ VectorUnloader unloader = new VectorUnloader(root);
+ Schema schema = root.getSchema();
+ LOGGER.debug("reading schema: " + schema);
+ int i = 0;
+ List<ArrowBlock> recordBatches = arrowReader.getRecordBlocks();
+ assertEquals(counts.length, recordBatches.size());
+ long previousOffset = 0;
+ for (ArrowBlock rbBlock : recordBatches) {
+ assertTrue(rbBlock.getOffset() + " > " + previousOffset, rbBlock.getOffset() > previousOffset);
+ previousOffset = rbBlock.getOffset();
+ arrowReader.loadRecordBatch(rbBlock);
+ assertEquals("RB #" + i, counts[i], root.getRowCount());
+ validator.accept(counts[i], root);
+ try (final ArrowRecordBatch batch = unloader.getRecordBatch()) {
+ List<ArrowBuffer> buffersLayout = batch.getBuffersLayout();
+ for (ArrowBuffer arrowBuffer : buffersLayout) {
+ assertEquals(0, arrowBuffer.getOffset() % 8);
+ }
+ }
+ ++i;
+ }
+ };
+ }
+
+ private CheckedConsumer<ArrowStreamReader> validateStreamBatches(
+ int[] counts, BiConsumer<Integer, VectorSchemaRoot> validator) {
+ return (arrowReader) -> {
+ VectorSchemaRoot root = arrowReader.getVectorSchemaRoot();
+ VectorUnloader unloader = new VectorUnloader(root);
+ Schema schema = root.getSchema();
+ LOGGER.debug("reading schema: " + schema);
+ int i = 0;
+
+ for (int n = 0; n < counts.length; n++) {
+ assertTrue(arrowReader.loadNextBatch());
+ assertEquals("RB #" + i, counts[i], root.getRowCount());
+ validator.accept(counts[i], root);
+ try (final ArrowRecordBatch batch = unloader.getRecordBatch()) {
+ final List<ArrowBuffer> buffersLayout = batch.getBuffersLayout();
+ for (ArrowBuffer arrowBuffer : buffersLayout) {
+ assertEquals(0, arrowBuffer.getOffset() % 8);
+ }
+ }
+ ++i;
+ }
+ assertFalse(arrowReader.loadNextBatch());
+ };
+ }
+
+ @FunctionalInterface
+ interface CheckedConsumer<T> {
+ void accept(T t) throws Exception;
+ }
+
+ @FunctionalInterface
+ interface CheckedBiConsumer<T, U> {
+ void accept(T t, U u) throws Exception;
+ }
+
+ private void roundTrip(VectorSchemaRoot root, DictionaryProvider provider,
+ CheckedBiConsumer<VectorSchemaRoot, ArrowWriter> writer,
+ CheckedConsumer<? super ArrowFileReader> fileValidator,
+ CheckedConsumer<? super ArrowStreamReader> streamValidator) throws Exception {
+ final File temp = File.createTempFile("arrow-test-" + name + "-", ".arrow");
+ temp.deleteOnExit();
+ final ByteArrayOutputStream memoryStream = new ByteArrayOutputStream();
+ final Map<String, String> metadata = new HashMap<>();
+ metadata.put("foo", "bar");
+ try (final FileOutputStream fileStream = new FileOutputStream(temp);
+ final ArrowFileWriter fileWriter =
+ new ArrowFileWriter(root, provider, fileStream.getChannel(), metadata, writeOption);
+ final ArrowStreamWriter streamWriter =
+ new ArrowStreamWriter(root, provider, Channels.newChannel(memoryStream), writeOption)) {
+ writer.accept(root, fileWriter);
+ writer.accept(root, streamWriter);
+ }
+
+ MessageMetadataResult metadataResult = MessageSerializer.readMessage(
+ new ReadChannel(Channels.newChannel(new ByteArrayInputStream(memoryStream.toByteArray()))));
+ assertNotNull(metadataResult);
+ assertEquals(writeOption.metadataVersion.toFlatbufID(), metadataResult.getMessage().version());
+
+ try (
+ BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, allocator.getLimit());
+ FileInputStream fileInputStream = new FileInputStream(temp);
+ ByteArrayInputStream inputStream = new ByteArrayInputStream(memoryStream.toByteArray());
+ ArrowFileReader fileReader = new ArrowFileReader(fileInputStream.getChannel(), readerAllocator);
+ ArrowStreamReader streamReader = new ArrowStreamReader(inputStream, readerAllocator)) {
+ fileValidator.accept(fileReader);
+ streamValidator.accept(streamReader);
+ assertEquals(writeOption.metadataVersion, fileReader.getFooter().getMetadataVersion());
+ assertEquals(metadata, fileReader.getMetaData());
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestUIntDictionaryRoundTrip.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestUIntDictionaryRoundTrip.java
new file mode 100644
index 000000000..6aa7a0c6d
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestUIntDictionaryRoundTrip.java
@@ -0,0 +1,246 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.channels.Channels;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Map;
+import java.util.function.ToIntBiFunction;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.UInt1Vector;
+import org.apache.arrow.vector.UInt2Vector;
+import org.apache.arrow.vector.UInt4Vector;
+import org.apache.arrow.vector.UInt8Vector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+/**
+ * Test the round-trip of dictionary encoding,
+ * with unsigned integer as indices.
+ */
+@RunWith(Parameterized.class)
+public class TestUIntDictionaryRoundTrip {
+
+ private final boolean streamMode;
+
+ public TestUIntDictionaryRoundTrip(boolean streamMode) {
+ this.streamMode = streamMode;
+ }
+
+ private BufferAllocator allocator;
+
+ private DictionaryProvider.MapDictionaryProvider dictionaryProvider;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ dictionaryProvider = new DictionaryProvider.MapDictionaryProvider();
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ private byte[] writeData(FieldVector encodedVector) throws IOException {
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ VectorSchemaRoot root =
+ new VectorSchemaRoot(
+ Arrays.asList(encodedVector.getField()), Arrays.asList(encodedVector), encodedVector.getValueCount());
+ try (ArrowWriter writer = streamMode ?
+ new ArrowStreamWriter(root, dictionaryProvider, out) :
+ new ArrowFileWriter(root, dictionaryProvider, Channels.newChannel(out))) {
+ writer.start();
+ writer.writeBatch();
+ writer.end();
+
+ return out.toByteArray();
+ }
+ }
+
+ private void readData(
+ byte[] data,
+ Field expectedField,
+ ToIntBiFunction<ValueVector, Integer> valGetter,
+ long dictionaryID,
+ int[] expectedIndices,
+ String[] expectedDictItems) throws IOException {
+ try (ArrowReader reader = streamMode ?
+ new ArrowStreamReader(new ByteArrayInputStream(data), allocator) :
+ new ArrowFileReader(new SeekableReadChannel(new ByteArrayReadableSeekableByteChannel(data)), allocator)) {
+
+ // verify schema
+ Schema readSchema = reader.getVectorSchemaRoot().getSchema();
+ assertEquals(1, readSchema.getFields().size());
+ assertEquals(expectedField, readSchema.getFields().get(0));
+
+ // verify vector schema root
+ assertTrue(reader.loadNextBatch());
+ VectorSchemaRoot root = reader.getVectorSchemaRoot();
+
+ assertEquals(1, root.getFieldVectors().size());
+ ValueVector encodedVector = root.getVector(0);
+ assertEquals(expectedIndices.length, encodedVector.getValueCount());
+
+ for (int i = 0; i < expectedIndices.length; i++) {
+ assertEquals(expectedIndices[i], valGetter.applyAsInt(encodedVector, i));
+ }
+
+ // verify dictionary
+ Map<Long, Dictionary> dictVectors = reader.getDictionaryVectors();
+ assertEquals(1, dictVectors.size());
+ Dictionary dictionary = dictVectors.get(dictionaryID);
+ assertNotNull(dictionary);
+
+ assertTrue(dictionary.getVector() instanceof VarCharVector);
+ VarCharVector dictVector = (VarCharVector) dictionary.getVector();
+ assertEquals(expectedDictItems.length, dictVector.getValueCount());
+ for (int i = 0; i < dictVector.getValueCount(); i++) {
+ assertArrayEquals(expectedDictItems[i].getBytes(), dictVector.get(i));
+ }
+ }
+ }
+
+ private ValueVector createEncodedVector(int bitWidth, VarCharVector dictionaryVector) {
+ final DictionaryEncoding dictionaryEncoding =
+ new DictionaryEncoding(bitWidth, false, new ArrowType.Int(bitWidth, false));
+ Dictionary dictionary = new Dictionary(dictionaryVector, dictionaryEncoding);
+ dictionaryProvider.put(dictionary);
+
+ final FieldType type =
+ new FieldType(true, dictionaryEncoding.getIndexType(), dictionaryEncoding, null);
+ final Field field = new Field("encoded", type, null);
+ return field.createVector(allocator);
+ }
+
+ @Test
+ public void testUInt1RoundTrip() throws IOException {
+ final int vectorLength = UInt1Vector.MAX_UINT1 & UInt1Vector.PROMOTION_MASK;
+ try (VarCharVector dictionaryVector = new VarCharVector("dictionary", allocator);
+ UInt1Vector encodedVector1 = (UInt1Vector) createEncodedVector(8, dictionaryVector)) {
+ int[] indices = new int[vectorLength];
+ String[] dictionaryItems = new String[vectorLength];
+ for (int i = 0; i < vectorLength; i++) {
+ encodedVector1.setSafe(i, (byte) i);
+ indices[i] = i;
+ dictionaryItems[i] = String.valueOf(i);
+ }
+ encodedVector1.setValueCount(vectorLength);
+ setVector(dictionaryVector, dictionaryItems);
+ byte[] data = writeData(encodedVector1);
+ readData(
+ data, encodedVector1.getField(), (vector, index) -> (int) ((UInt1Vector) vector).getValueAsLong(index),
+ 8L, indices, dictionaryItems);
+ }
+ }
+
+ @Test
+ public void testUInt2RoundTrip() throws IOException {
+ try (VarCharVector dictionaryVector = new VarCharVector("dictionary", allocator);
+ UInt2Vector encodedVector2 = (UInt2Vector) createEncodedVector(16, dictionaryVector)) {
+ int[] indices = new int[]{1, 3, 5, 7, 9, UInt2Vector.MAX_UINT2};
+ String[] dictItems = new String[UInt2Vector.MAX_UINT2];
+ for (int i = 0; i < UInt2Vector.MAX_UINT2; i++) {
+ dictItems[i] = String.valueOf(i);
+ }
+
+ setVector(encodedVector2, (char) 1, (char) 3, (char) 5, (char) 7, (char) 9, UInt2Vector.MAX_UINT2);
+ setVector(dictionaryVector, dictItems);
+
+ byte[] data = writeData(encodedVector2);
+ readData(data, encodedVector2.getField(), (vector, index) -> (int) ((UInt2Vector) vector).getValueAsLong(index),
+ 16L, indices, dictItems);
+ }
+ }
+
+ @Test
+ public void testUInt4RoundTrip() throws IOException {
+ final int dictLength = 10;
+ try (VarCharVector dictionaryVector = new VarCharVector("dictionary", allocator);
+ UInt4Vector encodedVector4 = (UInt4Vector) createEncodedVector(32, dictionaryVector)) {
+ int[] indices = new int[]{1, 3, 5, 7, 9};
+ String[] dictItems = new String[dictLength];
+ for (int i = 0; i < dictLength; i++) {
+ dictItems[i] = String.valueOf(i);
+ }
+
+ setVector(encodedVector4, 1, 3, 5, 7, 9);
+ setVector(dictionaryVector, dictItems);
+
+ setVector(encodedVector4, 1, 3, 5, 7, 9);
+ byte[] data = writeData(encodedVector4);
+ readData(data, encodedVector4.getField(), (vector, index) -> (int) ((UInt4Vector) vector).getValueAsLong(index),
+ 32L, indices, dictItems);
+ }
+ }
+
+ @Test
+ public void testUInt8RoundTrip() throws IOException {
+ final int dictLength = 10;
+ try (VarCharVector dictionaryVector = new VarCharVector("dictionary", allocator);
+ UInt8Vector encodedVector8 = (UInt8Vector) createEncodedVector(64, dictionaryVector)) {
+ int[] indices = new int[]{1, 3, 5, 7, 9};
+ String[] dictItems = new String[dictLength];
+ for (int i = 0; i < dictLength; i++) {
+ dictItems[i] = String.valueOf(i);
+ }
+
+ setVector(encodedVector8, 1L, 3L, 5L, 7L, 9L);
+ setVector(dictionaryVector, dictItems);
+
+ byte[] data = writeData(encodedVector8);
+ readData(data, encodedVector8.getField(), (vector, index) -> (int) ((UInt8Vector) vector).getValueAsLong(index),
+ 64L, indices, dictItems);
+ }
+ }
+
+ @Parameterized.Parameters(name = "stream mode = {0}")
+ public static Collection<Object[]> getRepeat() {
+ return Arrays.asList(
+ new Object[]{true},
+ new Object[]{false}
+ );
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/message/TestMessageMetadataResult.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/message/TestMessageMetadataResult.java
new file mode 100644
index 000000000..ee5361547
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/message/TestMessageMetadataResult.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc.message;
+
+import static org.junit.Assert.assertEquals;
+
+import java.nio.ByteBuffer;
+
+import org.junit.Test;
+
+public class TestMessageMetadataResult {
+
+ @Test
+ public void getMessageLength_returnsConstructValue() {
+ // This API is used by spark.
+ MessageMetadataResult result = new MessageMetadataResult(1, ByteBuffer.allocate(0),
+ new org.apache.arrow.flatbuf.Message());
+ assertEquals(result.getMessageLength(), 1);
+ }
+
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java
new file mode 100644
index 000000000..5cc0d0800
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java
@@ -0,0 +1,169 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.pojo;
+
+import static org.apache.arrow.vector.types.FloatingPointPrecision.DOUBLE;
+import static org.apache.arrow.vector.types.FloatingPointPrecision.SINGLE;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.util.Collections2;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.UnionMode;
+import org.apache.arrow.vector.types.pojo.ArrowType.FloatingPoint;
+import org.apache.arrow.vector.types.pojo.ArrowType.Int;
+import org.apache.arrow.vector.types.pojo.ArrowType.List;
+import org.apache.arrow.vector.types.pojo.ArrowType.Struct;
+import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp;
+import org.apache.arrow.vector.types.pojo.ArrowType.Union;
+import org.apache.arrow.vector.types.pojo.ArrowType.Utf8;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.Test;
+
+import com.google.flatbuffers.FlatBufferBuilder;
+
+/**
+ * Test conversion between Flatbuf and Pojo field representations.
+ */
+public class TestConvert {
+
+ @Test
+ public void simple() {
+ Field initialField = new Field("a", FieldType.nullable(new Int(32, true)), null);
+ run(initialField);
+ }
+
+ @Test
+ public void complex() {
+ java.util.List<Field> children = new ArrayList<>();
+ children.add(new Field("child1", FieldType.nullable(Utf8.INSTANCE), null));
+ children.add(new Field("child2", FieldType.nullable(new FloatingPoint(SINGLE)), Collections.emptyList()));
+
+ Field initialField = new Field("a", FieldType.nullable(Struct.INSTANCE), children);
+ run(initialField);
+ }
+
+ @Test
+ public void list() throws Exception {
+ java.util.List<Field> children = new ArrayList<>();
+ try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+ ListVector writeVector = ListVector.empty("list", allocator);
+ FixedSizeListVector writeFixedVector = FixedSizeListVector.empty("fixedlist", 5, allocator)) {
+ Field listVectorField = writeVector.getField();
+ children.add(listVectorField);
+ Field listFixedVectorField = writeFixedVector.getField();
+ children.add(listFixedVectorField);
+ }
+
+ Field initialField = new Field("a", FieldType.nullable(Struct.INSTANCE), children);
+ java.util.List<Field> parent = new ArrayList<>();
+ parent.add(initialField);
+ FlatBufferBuilder builder = new FlatBufferBuilder();
+ builder.finish(initialField.getField(builder));
+ org.apache.arrow.flatbuf.Field flatBufField = org.apache.arrow.flatbuf.Field.getRootAsField(builder.dataBuffer());
+ Field finalField = Field.convertField(flatBufField);
+ assertEquals(initialField, finalField);
+ assertFalse(finalField.toString().contains("[DEFAULT]"));
+
+ Schema initialSchema = new Schema(parent);
+ String jsonSchema = initialSchema.toJson();
+ String modifiedSchema = jsonSchema.replace("$data$", "[DEFAULT]");
+
+ Schema tempSchema = Schema.fromJSON(modifiedSchema);
+ FlatBufferBuilder schemaBuilder = new FlatBufferBuilder();
+ org.apache.arrow.vector.types.pojo.Schema schema =
+ new org.apache.arrow.vector.types.pojo.Schema(tempSchema.getFields());
+ schemaBuilder.finish(schema.getSchema(schemaBuilder));
+ Schema finalSchema = Schema.deserialize(ByteBuffer.wrap(schemaBuilder.sizedByteArray()));
+ assertFalse(finalSchema.toString().contains("[DEFAULT]"));
+ }
+
+ @Test
+ public void schema() {
+ java.util.List<Field> children = new ArrayList<>();
+ children.add(new Field("child1", FieldType.nullable(Utf8.INSTANCE), null));
+ children.add(new Field("child2", FieldType.nullable(new FloatingPoint(SINGLE)), Collections.emptyList()));
+ Schema initialSchema = new Schema(children);
+ run(initialSchema);
+ }
+
+ @Test
+ public void schemaMetadata() {
+ java.util.List<Field> children = new ArrayList<>();
+ children.add(new Field("child1", FieldType.nullable(Utf8.INSTANCE), null));
+ children.add(new Field("child2", FieldType.nullable(new FloatingPoint(SINGLE)), Collections.emptyList()));
+ Map<String, String> metadata = new HashMap<>();
+ metadata.put("key1", "value1");
+ metadata.put("key2", "value2");
+ Schema initialSchema = new Schema(children, metadata);
+ run(initialSchema);
+ }
+
+ @Test
+ public void nestedSchema() {
+ java.util.List<Field> children = new ArrayList<>();
+ children.add(new Field("child1", FieldType.nullable(Utf8.INSTANCE), null));
+ children.add(new Field("child2", FieldType.nullable(new FloatingPoint(SINGLE)), Collections.emptyList()));
+ children.add(new Field("child3", FieldType.nullable(new Struct()), Collections2.asImmutableList(
+ new Field("child3.1", FieldType.nullable(Utf8.INSTANCE), null),
+ new Field("child3.2", FieldType.nullable(new FloatingPoint(DOUBLE)), Collections.emptyList())
+ )));
+ children.add(new Field("child4", FieldType.nullable(new List()), Collections2.asImmutableList(
+ new Field("child4.1", FieldType.nullable(Utf8.INSTANCE), null)
+ )));
+ children.add(new Field("child5", FieldType.nullable(
+ new Union(UnionMode.Sparse, new int[] {MinorType.TIMESTAMPMILLI.ordinal(), MinorType.FLOAT8.ordinal()})),
+ Collections2.asImmutableList(
+ new Field("child5.1", FieldType.nullable(new Timestamp(TimeUnit.MILLISECOND, null)), null),
+ new Field("child5.2", FieldType.nullable(new FloatingPoint(DOUBLE)), Collections.emptyList()),
+ new Field("child5.3", FieldType.nullable(new Timestamp(TimeUnit.MILLISECOND, "UTC")), null)
+ )));
+ Schema initialSchema = new Schema(children);
+ run(initialSchema);
+ }
+
+ private void run(Field initialField) {
+ FlatBufferBuilder builder = new FlatBufferBuilder();
+ builder.finish(initialField.getField(builder));
+ org.apache.arrow.flatbuf.Field flatBufField = org.apache.arrow.flatbuf.Field.getRootAsField(builder.dataBuffer());
+ Field finalField = Field.convertField(flatBufField);
+ assertEquals(initialField, finalField);
+ }
+
+ private void run(Schema initialSchema) {
+ FlatBufferBuilder builder = new FlatBufferBuilder();
+ builder.finish(initialSchema.getSchema(builder));
+ org.apache.arrow.flatbuf.Schema flatBufSchema =
+ org.apache.arrow.flatbuf.Schema.getRootAsSchema(builder.dataBuffer());
+ Schema finalSchema = Schema.convertSchema(flatBufSchema);
+ assertEquals(initialSchema, finalSchema);
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/testing/RandomDataGenerator.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/testing/RandomDataGenerator.java
new file mode 100644
index 000000000..4b1094d28
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/testing/RandomDataGenerator.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.testing;
+
+import java.util.Random;
+import java.util.function.Supplier;
+
+/**
+ * Utility for generating random data.
+ */
+public class RandomDataGenerator {
+
+ static final Random random = new Random(0);
+
+ public static final Supplier<Byte> TINY_INT_GENERATOR = () -> (byte) random.nextInt();
+
+ public static final Supplier<Short> SMALL_INT_GENERATOR = () -> (short) random.nextInt();
+
+ public static final Supplier<Integer> INT_GENERATOR = () -> random.nextInt();
+
+ public static final Supplier<Long> LONG_GENERATOR = () -> random.nextLong();
+
+ public static final Supplier<Float> FLOAT_GENERATOR = () -> random.nextFloat();
+
+ public static final Supplier<Double> DOUBLE_GENERATOR = () -> random.nextDouble();
+
+ private RandomDataGenerator() {
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/testing/TestValueVectorPopulator.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/testing/TestValueVectorPopulator.java
new file mode 100644
index 000000000..f5d15e2c6
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/testing/TestValueVectorPopulator.java
@@ -0,0 +1,604 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.testing;
+
+import static junit.framework.TestCase.assertTrue;
+import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.BitVector;
+import org.apache.arrow.vector.DateDayVector;
+import org.apache.arrow.vector.DateMilliVector;
+import org.apache.arrow.vector.DecimalVector;
+import org.apache.arrow.vector.DurationVector;
+import org.apache.arrow.vector.FixedSizeBinaryVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.IntervalYearVector;
+import org.apache.arrow.vector.SmallIntVector;
+import org.apache.arrow.vector.TimeMicroVector;
+import org.apache.arrow.vector.TimeMilliVector;
+import org.apache.arrow.vector.TimeNanoVector;
+import org.apache.arrow.vector.TimeSecVector;
+import org.apache.arrow.vector.TimeStampMicroVector;
+import org.apache.arrow.vector.TimeStampMilliVector;
+import org.apache.arrow.vector.TimeStampNanoVector;
+import org.apache.arrow.vector.TimeStampSecVector;
+import org.apache.arrow.vector.TinyIntVector;
+import org.apache.arrow.vector.UInt1Vector;
+import org.apache.arrow.vector.UInt2Vector;
+import org.apache.arrow.vector.UInt4Vector;
+import org.apache.arrow.vector.UInt8Vector;
+import org.apache.arrow.vector.VarBinaryVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.compare.VectorEqualsVisitor;
+import org.apache.arrow.vector.testing.ValueVectorDataPopulator;
+import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestValueVectorPopulator {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testPopulateBigIntVector() {
+ try (final BigIntVector vector1 = new BigIntVector("vector", allocator);
+ final BigIntVector vector2 = new BigIntVector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i);
+ }
+ }
+ vector1.setValueCount(10);
+
+ setVector(vector2, null, 1L, null, 3L, null, 5L, null, 7L, null, 9L);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateBitVector() {
+ try (final BitVector vector1 = new BitVector("vector", allocator);
+ final BitVector vector2 = new BitVector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i > 5 ? 0 : 1);
+ }
+ }
+ vector1.setValueCount(10);
+
+ setVector(vector2, null, 1, null, 1, null, 0, null, 0, null, 0);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateDateDayVector() {
+ try (final DateDayVector vector1 = new DateDayVector("vector", allocator);
+ final DateDayVector vector2 = new DateDayVector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i * 10);
+ }
+ }
+ vector1.setValueCount(10);
+
+ setVector(vector2, null, 10, null, 30, null, 50, null, 70, null, 90);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateDateMilliVector() {
+ try (final DateMilliVector vector1 = new DateMilliVector("vector", allocator);
+ final DateMilliVector vector2 = new DateMilliVector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i * 1000);
+ }
+ }
+ vector1.setValueCount(10);
+
+ setVector(vector2, null, 1000L, null, 3000L, null, 5000L, null, 7000L, null, 9000L);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateDecimalVector() {
+ try (final DecimalVector vector1 = new DecimalVector("vector", allocator, 10, 3);
+ final DecimalVector vector2 = new DecimalVector("vector", allocator, 10, 3)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i);
+ }
+ }
+ vector1.setValueCount(10);
+
+ setVector(vector2, null, 1L, null, 3L, null, 5L, null, 7L, null, 9L);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateDurationVector() {
+ final FieldType fieldType = FieldType.nullable(new ArrowType.Duration(TimeUnit.SECOND));
+ try (final DurationVector vector1 = new DurationVector("vector", fieldType, allocator);
+ final DurationVector vector2 = new DurationVector("vector", fieldType, allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i);
+ }
+ }
+ vector1.setValueCount(10);
+
+ setVector(vector2, null, 1L, null, 3L, null, 5L, null, 7L, null, 9L);
+
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateFixedSizeBinaryVector() {
+ try (final FixedSizeBinaryVector vector1 = new FixedSizeBinaryVector("vector", allocator, 5);
+ final FixedSizeBinaryVector vector2 = new FixedSizeBinaryVector("vector", allocator, 5)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, ("test" + i).getBytes());
+ }
+ }
+ vector1.setValueCount(10);
+
+ setVector(vector2, null, "test1".getBytes(), null, "test3".getBytes(), null, "test5".getBytes(), null,
+ "test7".getBytes(), null, "test9".getBytes());
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateFloat4Vector() {
+ try (final Float4Vector vector1 = new Float4Vector("vector", allocator);
+ final Float4Vector vector2 = new Float4Vector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i);
+ }
+ }
+ vector1.setValueCount(10);
+ setVector(vector2, null, 1f, null, 3f, null, 5f, null, 7f, null, 9f);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateFloat8Vector() {
+ try (final Float8Vector vector1 = new Float8Vector("vector", allocator);
+ final Float8Vector vector2 = new Float8Vector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i);
+ }
+ }
+ vector1.setValueCount(10);
+ setVector(vector2, null, 1d, null, 3d, null, 5d, null, 7d, null, 9d);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateIntVector() {
+ try (final IntVector vector1 = new IntVector("vector", allocator);
+ final IntVector vector2 = new IntVector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i);
+ }
+ }
+ vector1.setValueCount(10);
+
+ ValueVectorDataPopulator.setVector(vector2, null, 1, null, 3, null, 5, null, 7, null, 9);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateSmallIntVector() {
+ try (final SmallIntVector vector1 = new SmallIntVector("vector", allocator);
+ final SmallIntVector vector2 = new SmallIntVector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i);
+ }
+ }
+ vector1.setValueCount(10);
+
+ ValueVectorDataPopulator.setVector(vector2, null, (short) 1, null, (short) 3, null, (short) 5,
+ null, (short) 7, null, (short) 9);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateIntervalDayVector() {
+ try (final IntervalYearVector vector1 = new IntervalYearVector("vector", allocator);
+ final IntervalYearVector vector2 = new IntervalYearVector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i);
+ }
+ }
+ vector1.setValueCount(10);
+
+ ValueVectorDataPopulator.setVector(vector2, null, 1, null, 3, null, 5, null, 7, null, 9);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateTimeMicroVector() {
+ try (final TimeMicroVector vector1 = new TimeMicroVector("vector", allocator);
+ final TimeMicroVector vector2 = new TimeMicroVector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i * 10000);
+ }
+ }
+ vector1.setValueCount(10);
+ setVector(vector2, null, 10000L, null, 30000L, null, 50000L, null, 70000L, null, 90000L);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateTimeMilliVector() {
+ try (final TimeMilliVector vector1 = new TimeMilliVector("vector", allocator);
+ final TimeMilliVector vector2 = new TimeMilliVector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i * 100);
+ }
+ }
+ vector1.setValueCount(10);
+ setVector(vector2, null, 100, null, 300, null, 500, null, 700, null, 900);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateTimeNanoVector() {
+ try (final TimeNanoVector vector1 = new TimeNanoVector("vector", allocator);
+ final TimeNanoVector vector2 = new TimeNanoVector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i * 10000);
+ }
+ }
+ vector1.setValueCount(10);
+ setVector(vector2, null, 10000L, null, 30000L, null, 50000L, null, 70000L, null, 90000L);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateTimeSecVector() {
+ try (final TimeSecVector vector1 = new TimeSecVector("vector", allocator);
+ final TimeSecVector vector2 = new TimeSecVector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i * 100);
+ }
+ }
+ vector1.setValueCount(10);
+ setVector(vector2, null, 100, null, 300, null, 500, null, 700, null, 900);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateTimeStampMicroVector() {
+ try (final TimeStampMicroVector vector1 = new TimeStampMicroVector("vector", allocator);
+ final TimeStampMicroVector vector2 = new TimeStampMicroVector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i * 10000);
+ }
+ }
+ vector1.setValueCount(10);
+ setVector(vector2, null, 10000L, null, 30000L, null, 50000L, null, 70000L, null, 90000L);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateTimeStampMilliVector() {
+ try (final TimeStampMilliVector vector1 = new TimeStampMilliVector("vector", allocator);
+ final TimeStampMilliVector vector2 = new TimeStampMilliVector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i * 10000);
+ }
+ }
+ vector1.setValueCount(10);
+ setVector(vector2, null, 10000L, null, 30000L, null, 50000L, null, 70000L, null, 90000L);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateTimeStampNanoVector() {
+ try (final TimeStampNanoVector vector1 = new TimeStampNanoVector("vector", allocator);
+ final TimeStampNanoVector vector2 = new TimeStampNanoVector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i * 10000);
+ }
+ }
+ vector1.setValueCount(10);
+ setVector(vector2, null, 10000L, null, 30000L, null, 50000L, null, 70000L, null, 90000L);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateTimeStampSecVector() {
+ try (final TimeStampSecVector vector1 = new TimeStampSecVector("vector", allocator);
+ final TimeStampSecVector vector2 = new TimeStampSecVector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i * 100);
+ }
+ }
+ vector1.setValueCount(10);
+ setVector(vector2, null, 100L, null, 300L, null, 500L, null, 700L, null, 900L);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateTinyIntVector() {
+ try (final TinyIntVector vector1 = new TinyIntVector("vector", allocator);
+ final TinyIntVector vector2 = new TinyIntVector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i);
+ }
+ }
+ vector1.setValueCount(10);
+ setVector(vector2, null, (byte) 1, null, (byte) 3, null, (byte) 5, null, (byte) 7, null, (byte) 9);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateUInt1Vector() {
+ try (final UInt1Vector vector1 = new UInt1Vector("vector", allocator);
+ final UInt1Vector vector2 = new UInt1Vector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i);
+ }
+ }
+ vector1.setValueCount(10);
+ setVector(vector2, null, (byte) 1, null, (byte) 3, null, (byte) 5, null, (byte) 7, null, (byte) 9);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateUInt2Vector() {
+ try (final UInt2Vector vector1 = new UInt2Vector("vector", allocator);
+ final UInt2Vector vector2 = new UInt2Vector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i);
+ }
+ }
+ vector1.setValueCount(10);
+ setVector(vector2, null, (char) 1, null, (char) 3, null, (char) 5, null, (char) 7, null, (char) 9);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateUInt4Vector() {
+ try (final UInt4Vector vector1 = new UInt4Vector("vector", allocator);
+ final UInt4Vector vector2 = new UInt4Vector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i);
+ }
+ }
+ vector1.setValueCount(10);
+ setVector(vector2, null, 1, null, 3, null, 5, null, 7, null, 9);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateUInt8Vector() {
+ try (final UInt8Vector vector1 = new UInt8Vector("vector", allocator);
+ final UInt8Vector vector2 = new UInt8Vector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i);
+ }
+ }
+ vector1.setValueCount(10);
+ setVector(vector2, null, 1L, null, 3L, null, 5L, null, 7L, null, 9L);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateVarBinaryVector() {
+ try (final VarBinaryVector vector1 = new VarBinaryVector("vector", allocator);
+ final VarBinaryVector vector2 = new VarBinaryVector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, ("test" + i).getBytes());
+ }
+ }
+ vector1.setValueCount(10);
+
+ setVector(vector2, null, "test1".getBytes(), null, "test3".getBytes(), null, "test5".getBytes(), null,
+ "test7".getBytes(), null, "test9".getBytes());
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateVarCharVector() {
+ try (final VarCharVector vector1 = new VarCharVector("vector", allocator);
+ final VarCharVector vector2 = new VarCharVector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, ("test" + i).getBytes());
+ }
+ }
+ vector1.setValueCount(10);
+
+ setVector(vector2, null, "test1", null, "test3", null, "test5", null, "test7", null, "test9");
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java
new file mode 100644
index 000000000..15d6a5cf9
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java
@@ -0,0 +1,708 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.testing;
+
+import static org.junit.Assert.assertEquals;
+
+import java.nio.charset.StandardCharsets;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.BitVector;
+import org.apache.arrow.vector.BitVectorHelper;
+import org.apache.arrow.vector.DateDayVector;
+import org.apache.arrow.vector.DateMilliVector;
+import org.apache.arrow.vector.DecimalVector;
+import org.apache.arrow.vector.DurationVector;
+import org.apache.arrow.vector.FixedSizeBinaryVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.IntervalDayVector;
+import org.apache.arrow.vector.IntervalYearVector;
+import org.apache.arrow.vector.LargeVarCharVector;
+import org.apache.arrow.vector.SmallIntVector;
+import org.apache.arrow.vector.TimeMicroVector;
+import org.apache.arrow.vector.TimeMilliVector;
+import org.apache.arrow.vector.TimeNanoVector;
+import org.apache.arrow.vector.TimeSecVector;
+import org.apache.arrow.vector.TimeStampMicroTZVector;
+import org.apache.arrow.vector.TimeStampMicroVector;
+import org.apache.arrow.vector.TimeStampMilliTZVector;
+import org.apache.arrow.vector.TimeStampMilliVector;
+import org.apache.arrow.vector.TimeStampNanoTZVector;
+import org.apache.arrow.vector.TimeStampNanoVector;
+import org.apache.arrow.vector.TimeStampSecTZVector;
+import org.apache.arrow.vector.TimeStampSecVector;
+import org.apache.arrow.vector.TinyIntVector;
+import org.apache.arrow.vector.UInt1Vector;
+import org.apache.arrow.vector.UInt2Vector;
+import org.apache.arrow.vector.UInt4Vector;
+import org.apache.arrow.vector.UInt8Vector;
+import org.apache.arrow.vector.VarBinaryVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.complex.BaseRepeatedValueVector;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.holders.IntervalDayHolder;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.FieldType;
+
+/**
+ * Utility for populating {@link org.apache.arrow.vector.ValueVector}.
+ */
+public class ValueVectorDataPopulator {
+
+ private ValueVectorDataPopulator(){}
+
+ /**
+ * Populate values for BigIntVector.
+ */
+ public static void setVector(BigIntVector vector, Long... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for BitVector.
+ */
+ public static void setVector(BitVector vector, Integer... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for DateDayVector.
+ * @param values numbers of days since UNIX epoch
+ */
+ public static void setVector(DateDayVector vector, Integer... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for DateMilliVector.
+ * @param values numbers of milliseconds since UNIX epoch
+ */
+ public static void setVector(DateMilliVector vector, Long... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for DecimalVector.
+ */
+ public static void setVector(DecimalVector vector, Long... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for DurationVector.
+ * @param values values of elapsed time in either seconds, milliseconds, microseconds or nanoseconds.
+ */
+ public static void setVector(DurationVector vector, Long... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for FixedSizeBinaryVector.
+ */
+ public static void setVector(FixedSizeBinaryVector vector, byte[]... values) {
+ final int length = values.length;
+ vector.allocateNewSafe();
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for Float4Vector.
+ */
+ public static void setVector(Float4Vector vector, Float... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for Float8Vector.
+ */
+ public static void setVector(Float8Vector vector, Double... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for IntVector.
+ */
+ public static void setVector(IntVector vector, Integer... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for IntervalDayVector.
+ * @param values holders witch holds days and milliseconds values which represents interval in SQL style.
+ */
+ public static void setVector(IntervalDayVector vector, IntervalDayHolder... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i].days, values[i].milliseconds);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for IntervalYearVector.
+ * @param values total month intervals in SQL style.
+ */
+ public static void setVector(IntervalYearVector vector, Integer... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for SmallIntVector.
+ */
+ public static void setVector(SmallIntVector vector, Short... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for TimeMicroVector.
+ * @param values numbers of microseconds since UNIX epoch
+ */
+ public static void setVector(TimeMicroVector vector, Long... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for TimeMicroVector.
+ * @param values numbers of milliseconds since UNIX epoch
+ */
+ public static void setVector(TimeMilliVector vector, Integer... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for TimeNanoVector.
+ * @param values numbers of nanoseconds since UNIX epoch
+ */
+ public static void setVector(TimeNanoVector vector, Long... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for TimeSecVector.
+ * @param values numbers of seconds since UNIX epoch
+ */
+ public static void setVector(TimeSecVector vector, Integer... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for TimeStampMicroTZVector.
+ * @param values numbers of microseconds since UNIX epoch
+ */
+ public static void setVector(TimeStampMicroTZVector vector, Long... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for TimeStampMicroVector.
+ * @param values numbers of microseconds since UNIX epoch
+ */
+ public static void setVector(TimeStampMicroVector vector, Long... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for TimeStampMilliTZVector.
+ * @param values numbers of milliseconds since UNIX epoch
+ */
+ public static void setVector(TimeStampMilliTZVector vector, Long... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for TimeStampMilliVector.
+ * @param values numbers of milliseconds since UNIX epoch
+ */
+ public static void setVector(TimeStampMilliVector vector, Long... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for TimeStampNanoTZVector.
+ * @param values numbers of nanoseconds since UNIX epoch
+ */
+ public static void setVector(TimeStampNanoTZVector vector, Long... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for TimeStampNanoVector.
+ * @param values numbers of nanoseconds since UNIX epoch
+ */
+ public static void setVector(TimeStampNanoVector vector, Long... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for TimeStampSecTZVector.
+ * @param values numbers of seconds since UNIX epoch
+ */
+ public static void setVector(TimeStampSecTZVector vector, Long... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for TimeStampSecVector.
+ * @param values numbers of seconds since UNIX epoch
+ */
+ public static void setVector(TimeStampSecVector vector, Long... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for TinyIntVector.
+ */
+ public static void setVector(TinyIntVector vector, Byte... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for UInt1Vector.
+ */
+ public static void setVector(UInt1Vector vector, Byte... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for UInt2Vector.
+ */
+ public static void setVector(UInt2Vector vector, Character... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for UInt4Vector.
+ */
+ public static void setVector(UInt4Vector vector, Integer... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for UInt8Vector.
+ */
+ public static void setVector(UInt8Vector vector, Long... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for VarBinaryVector.
+ */
+ public static void setVector(VarBinaryVector vector, byte[]... values) {
+ final int length = values.length;
+ vector.allocateNewSafe();
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for VarCharVector.
+ */
+ public static void setVector(VarCharVector vector, byte[]... values) {
+ final int length = values.length;
+ vector.allocateNewSafe();
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for LargeVarCharVector.
+ */
+ public static void setVector(LargeVarCharVector vector, byte[]... values) {
+ final int length = values.length;
+ vector.allocateNewSafe();
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for VarCharVector.
+ */
+ public static void setVector(VarCharVector vector, String... values) {
+ final int length = values.length;
+ vector.allocateNewSafe();
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.setSafe(i, values[i].getBytes(StandardCharsets.UTF_8));
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for LargeVarCharVector.
+ */
+ public static void setVector(LargeVarCharVector vector, String... values) {
+ final int length = values.length;
+ vector.allocateNewSafe();
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.setSafe(i, values[i].getBytes(StandardCharsets.UTF_8));
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for {@link ListVector}.
+ */
+ public static void setVector(ListVector vector, List<Integer>... values) {
+ vector.allocateNewSafe();
+ Types.MinorType type = Types.MinorType.INT;
+ vector.addOrGetVector(FieldType.nullable(type.getType()));
+
+ IntVector dataVector = (IntVector) vector.getDataVector();
+ dataVector.allocateNew();
+
+ // set underlying vectors
+ int curPos = 0;
+ vector.getOffsetBuffer().setInt(0, curPos);
+ for (int i = 0; i < values.length; i++) {
+ if (values[i] == null) {
+ BitVectorHelper.unsetBit(vector.getValidityBuffer(), i);
+ } else {
+ BitVectorHelper.setBit(vector.getValidityBuffer(), i);
+ for (int value : values[i]) {
+ dataVector.setSafe(curPos, value);
+ curPos += 1;
+ }
+ }
+ vector.getOffsetBuffer().setInt((i + 1) * BaseRepeatedValueVector.OFFSET_WIDTH, curPos);
+ }
+ dataVector.setValueCount(curPos);
+ vector.setLastSet(values.length - 1);
+ vector.setValueCount(values.length);
+ }
+
+ /**
+ * Populate values for {@link LargeListVector}.
+ */
+ public static void setVector(LargeListVector vector, List<Integer>... values) {
+ vector.allocateNewSafe();
+ Types.MinorType type = Types.MinorType.INT;
+ vector.addOrGetVector(FieldType.nullable(type.getType()));
+
+ IntVector dataVector = (IntVector) vector.getDataVector();
+ dataVector.allocateNew();
+
+ // set underlying vectors
+ int curPos = 0;
+ vector.getOffsetBuffer().setLong(0, curPos);
+ for (int i = 0; i < values.length; i++) {
+ if (values[i] == null) {
+ BitVectorHelper.unsetBit(vector.getValidityBuffer(), i);
+ } else {
+ BitVectorHelper.setBit(vector.getValidityBuffer(), i);
+ for (int value : values[i]) {
+ dataVector.setSafe(curPos, value);
+ curPos += 1;
+ }
+ }
+ vector.getOffsetBuffer().setLong((long) (i + 1) * LargeListVector.OFFSET_WIDTH, curPos);
+ }
+ dataVector.setValueCount(curPos);
+ vector.setLastSet(values.length - 1);
+ vector.setValueCount(values.length);
+ }
+
+ /**
+ * Populate values for {@link FixedSizeListVector}.
+ */
+ public static void setVector(FixedSizeListVector vector, List<Integer>... values) {
+ vector.allocateNewSafe();
+ for (int i = 0; i < values.length; i++) {
+ if (values[i] != null) {
+ assertEquals(vector.getListSize(), values[i].size());
+ }
+ }
+
+ Types.MinorType type = Types.MinorType.INT;
+ vector.addOrGetVector(FieldType.nullable(type.getType()));
+
+ IntVector dataVector = (IntVector) vector.getDataVector();
+ dataVector.allocateNew();
+
+ // set underlying vectors
+ int curPos = 0;
+ for (int i = 0; i < values.length; i++) {
+ if (values[i] == null) {
+ BitVectorHelper.unsetBit(vector.getValidityBuffer(), i);
+ } else {
+ BitVectorHelper.setBit(vector.getValidityBuffer(), i);
+ for (int value : values[i]) {
+ dataVector.setSafe(curPos, value);
+ curPos += 1;
+ }
+ }
+ }
+ dataVector.setValueCount(curPos);
+ vector.setValueCount(values.length);
+ }
+
+ /**
+ * Populate values for {@link StructVector}.
+ */
+ public static void setVector(StructVector vector, Map<String, List<Integer>> values) {
+ vector.allocateNewSafe();
+
+ int valueCount = 0;
+ for (final Entry<String, List<Integer>> entry : values.entrySet()) {
+ // Add the child
+ final IntVector child = vector.addOrGet(entry.getKey(),
+ FieldType.nullable(MinorType.INT.getType()), IntVector.class);
+
+ // Write the values to the child
+ child.allocateNew();
+ final List<Integer> v = entry.getValue();
+ for (int i = 0; i < v.size(); i++) {
+ if (v.get(i) != null) {
+ child.set(i, v.get(i));
+ vector.setIndexDefined(i);
+ } else {
+ child.setNull(i);
+ }
+ }
+ valueCount = Math.max(valueCount, v.size());
+ }
+ vector.setValueCount(valueCount);
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java
new file mode 100644
index 000000000..8b2743210
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java
@@ -0,0 +1,420 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.types.pojo;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+import java.nio.channels.SeekableByteChannel;
+import java.nio.channels.WritableByteChannel;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.nio.file.StandardOpenOption;
+import java.util.Collections;
+import java.util.UUID;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.vector.ExtensionTypeVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.FixedSizeBinaryVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.compare.Range;
+import org.apache.arrow.vector.compare.RangeEqualsVisitor;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.ipc.ArrowFileReader;
+import org.apache.arrow.vector.ipc.ArrowFileWriter;
+import org.apache.arrow.vector.types.FloatingPointPrecision;
+import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType;
+import org.apache.arrow.vector.util.VectorBatchAppender;
+import org.apache.arrow.vector.validate.ValidateVectorVisitor;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestExtensionType {
+ /**
+ * Test that a custom UUID type can be round-tripped through a temporary file.
+ */
+ @Test
+ public void roundtripUuid() throws IOException {
+ ExtensionTypeRegistry.register(new UuidType());
+ final Schema schema = new Schema(Collections.singletonList(Field.nullable("a", new UuidType())));
+ try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+ final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) {
+ UUID u1 = UUID.randomUUID();
+ UUID u2 = UUID.randomUUID();
+ UuidVector vector = (UuidVector) root.getVector("a");
+ vector.setValueCount(2);
+ vector.set(0, u1);
+ vector.set(1, u2);
+ root.setRowCount(2);
+
+ final File file = File.createTempFile("uuidtest", ".arrow");
+ try (final WritableByteChannel channel = FileChannel
+ .open(Paths.get(file.getAbsolutePath()), StandardOpenOption.WRITE);
+ final ArrowFileWriter writer = new ArrowFileWriter(root, null, channel)) {
+ writer.start();
+ writer.writeBatch();
+ writer.end();
+ }
+
+ try (final SeekableByteChannel channel = Files.newByteChannel(Paths.get(file.getAbsolutePath()));
+ final ArrowFileReader reader = new ArrowFileReader(channel, allocator)) {
+ reader.loadNextBatch();
+ final VectorSchemaRoot readerRoot = reader.getVectorSchemaRoot();
+ Assert.assertEquals(root.getSchema(), readerRoot.getSchema());
+
+ final Field field = readerRoot.getSchema().getFields().get(0);
+ final UuidType expectedType = new UuidType();
+ Assert.assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME),
+ expectedType.extensionName());
+ Assert.assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_METADATA),
+ expectedType.serialize());
+
+ final ExtensionTypeVector deserialized = (ExtensionTypeVector) readerRoot.getFieldVectors().get(0);
+ Assert.assertEquals(vector.getValueCount(), deserialized.getValueCount());
+ for (int i = 0; i < vector.getValueCount(); i++) {
+ Assert.assertEquals(vector.isNull(i), deserialized.isNull(i));
+ if (!vector.isNull(i)) {
+ Assert.assertEquals(vector.getObject(i), deserialized.getObject(i));
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * Test that a custom UUID type can be read as its underlying type.
+ */
+ @Test
+ public void readUnderlyingType() throws IOException {
+ ExtensionTypeRegistry.register(new UuidType());
+ final Schema schema = new Schema(Collections.singletonList(Field.nullable("a", new UuidType())));
+ try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+ final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) {
+ UUID u1 = UUID.randomUUID();
+ UUID u2 = UUID.randomUUID();
+ UuidVector vector = (UuidVector) root.getVector("a");
+ vector.setValueCount(2);
+ vector.set(0, u1);
+ vector.set(1, u2);
+ root.setRowCount(2);
+
+ final File file = File.createTempFile("uuidtest", ".arrow");
+ try (final WritableByteChannel channel = FileChannel
+ .open(Paths.get(file.getAbsolutePath()), StandardOpenOption.WRITE);
+ final ArrowFileWriter writer = new ArrowFileWriter(root, null, channel)) {
+ writer.start();
+ writer.writeBatch();
+ writer.end();
+ }
+
+ ExtensionTypeRegistry.unregister(new UuidType());
+
+ try (final SeekableByteChannel channel = Files.newByteChannel(Paths.get(file.getAbsolutePath()));
+ final ArrowFileReader reader = new ArrowFileReader(channel, allocator)) {
+ reader.loadNextBatch();
+ final VectorSchemaRoot readerRoot = reader.getVectorSchemaRoot();
+ Assert.assertEquals(1, readerRoot.getSchema().getFields().size());
+ Assert.assertEquals("a", readerRoot.getSchema().getFields().get(0).getName());
+ Assert.assertTrue(readerRoot.getSchema().getFields().get(0).getType() instanceof ArrowType.FixedSizeBinary);
+ Assert.assertEquals(16,
+ ((ArrowType.FixedSizeBinary) readerRoot.getSchema().getFields().get(0).getType()).getByteWidth());
+
+ final Field field = readerRoot.getSchema().getFields().get(0);
+ final UuidType expectedType = new UuidType();
+ Assert.assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME),
+ expectedType.extensionName());
+ Assert.assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_METADATA),
+ expectedType.serialize());
+
+ final FixedSizeBinaryVector deserialized = (FixedSizeBinaryVector) readerRoot.getFieldVectors().get(0);
+ Assert.assertEquals(vector.getValueCount(), deserialized.getValueCount());
+ for (int i = 0; i < vector.getValueCount(); i++) {
+ Assert.assertEquals(vector.isNull(i), deserialized.isNull(i));
+ if (!vector.isNull(i)) {
+ final UUID uuid = vector.getObject(i);
+ final ByteBuffer bb = ByteBuffer.allocate(16);
+ bb.putLong(uuid.getMostSignificantBits());
+ bb.putLong(uuid.getLeastSignificantBits());
+ Assert.assertArrayEquals(bb.array(), deserialized.get(i));
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testNullCheck() {
+ NullPointerException e = assertThrows(NullPointerException.class,
+ () -> {
+ try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+ final ExtensionTypeVector vector = new UuidVector("uuid", allocator, null)) {
+ vector.getField();
+ vector.allocateNewSafe();
+ }
+ });
+ assertTrue(e.getMessage().contains("underlyingVector can not be null."));
+ }
+
+ /**
+ * Test that a custom Location type can be round-tripped through a temporary file.
+ */
+ @Test
+ public void roundtripLocation() throws IOException {
+ ExtensionTypeRegistry.register(new LocationType());
+ final Schema schema = new Schema(Collections.singletonList(Field.nullable("location", new LocationType())));
+ try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+ final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) {
+ LocationVector vector = (LocationVector) root.getVector("location");
+ vector.allocateNew();
+ vector.set(0, 34.073814f, -118.240784f);
+ vector.set(2, 37.768056f, -122.3875f);
+ vector.set(3, 40.739716f, -73.840782f);
+ vector.setValueCount(4);
+ root.setRowCount(4);
+
+ final File file = File.createTempFile("locationtest", ".arrow");
+ try (final WritableByteChannel channel = FileChannel
+ .open(Paths.get(file.getAbsolutePath()), StandardOpenOption.WRITE);
+ final ArrowFileWriter writer = new ArrowFileWriter(root, null, channel)) {
+ writer.start();
+ writer.writeBatch();
+ writer.end();
+ }
+
+ try (final SeekableByteChannel channel = Files.newByteChannel(Paths.get(file.getAbsolutePath()));
+ final ArrowFileReader reader = new ArrowFileReader(channel, allocator)) {
+ reader.loadNextBatch();
+ final VectorSchemaRoot readerRoot = reader.getVectorSchemaRoot();
+ Assert.assertEquals(root.getSchema(), readerRoot.getSchema());
+
+ final Field field = readerRoot.getSchema().getFields().get(0);
+ final LocationType expectedType = new LocationType();
+ Assert.assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME),
+ expectedType.extensionName());
+ Assert.assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_METADATA),
+ expectedType.serialize());
+
+ final ExtensionTypeVector deserialized = (ExtensionTypeVector) readerRoot.getFieldVectors().get(0);
+ Assert.assertTrue(deserialized instanceof LocationVector);
+ Assert.assertEquals(deserialized.getName(), "location");
+ StructVector deserStruct = (StructVector) deserialized.getUnderlyingVector();
+ Assert.assertNotNull(deserStruct.getChild("Latitude"));
+ Assert.assertNotNull(deserStruct.getChild("Longitude"));
+ Assert.assertEquals(vector.getValueCount(), deserialized.getValueCount());
+ for (int i = 0; i < vector.getValueCount(); i++) {
+ Assert.assertEquals(vector.isNull(i), deserialized.isNull(i));
+ if (!vector.isNull(i)) {
+ Assert.assertEquals(vector.getObject(i), deserialized.getObject(i));
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testVectorCompare() {
+ UuidType uuidType = new UuidType();
+ ExtensionTypeRegistry.register(uuidType);
+ try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+ UuidVector a1 = (UuidVector) uuidType.getNewVector("a", FieldType.nullable(uuidType), allocator);
+ UuidVector a2 = (UuidVector) uuidType.getNewVector("a", FieldType.nullable(uuidType), allocator);
+ UuidVector bb = (UuidVector) uuidType.getNewVector("a", FieldType.nullable(uuidType), allocator)
+ ) {
+ UUID u1 = UUID.randomUUID();
+ UUID u2 = UUID.randomUUID();
+
+ // Test out type and vector validation visitors for an ExtensionTypeVector
+ ValidateVectorVisitor validateVisitor = new ValidateVectorVisitor();
+ validateVisitor.visit(a1, null);
+
+ a1.setValueCount(2);
+ a1.set(0, u1);
+ a1.set(1, u2);
+
+ a2.setValueCount(2);
+ a2.set(0, u1);
+ a2.set(1, u2);
+
+ bb.setValueCount(2);
+ bb.set(0, u2);
+ bb.set(1, u1);
+
+ Range range = new Range(0, 0, a1.getValueCount());
+ RangeEqualsVisitor visitor = new RangeEqualsVisitor(a1, a2);
+ assertTrue(visitor.rangeEquals(range));
+
+ visitor = new RangeEqualsVisitor(a1, bb);
+ assertFalse(visitor.rangeEquals(range));
+
+ // Test out vector appender
+ VectorBatchAppender.batchAppend(a1, a2, bb);
+ assertEquals(a1.getValueCount(), 6);
+ validateVisitor.visit(a1, null);
+ }
+ }
+
+ static class UuidType extends ExtensionType {
+
+ @Override
+ public ArrowType storageType() {
+ return new ArrowType.FixedSizeBinary(16);
+ }
+
+ @Override
+ public String extensionName() {
+ return "uuid";
+ }
+
+ @Override
+ public boolean extensionEquals(ExtensionType other) {
+ return other instanceof UuidType;
+ }
+
+ @Override
+ public ArrowType deserialize(ArrowType storageType, String serializedData) {
+ if (!storageType.equals(storageType())) {
+ throw new UnsupportedOperationException("Cannot construct UuidType from underlying type " + storageType);
+ }
+ return new UuidType();
+ }
+
+ @Override
+ public String serialize() {
+ return "";
+ }
+
+ @Override
+ public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ return new UuidVector(name, allocator, new FixedSizeBinaryVector(name, allocator, 16));
+ }
+ }
+
+ static class UuidVector extends ExtensionTypeVector<FixedSizeBinaryVector> {
+
+ public UuidVector(String name, BufferAllocator allocator, FixedSizeBinaryVector underlyingVector) {
+ super(name, allocator, underlyingVector);
+ }
+
+ @Override
+ public UUID getObject(int index) {
+ final ByteBuffer bb = ByteBuffer.wrap(getUnderlyingVector().getObject(index));
+ return new UUID(bb.getLong(), bb.getLong());
+ }
+
+ @Override
+ public int hashCode(int index) {
+ return hashCode(index, null);
+ }
+
+ @Override
+ public int hashCode(int index, ArrowBufHasher hasher) {
+ return getUnderlyingVector().hashCode(index, hasher);
+ }
+
+ public void set(int index, UUID uuid) {
+ ByteBuffer bb = ByteBuffer.allocate(16);
+ bb.putLong(uuid.getMostSignificantBits());
+ bb.putLong(uuid.getLeastSignificantBits());
+ getUnderlyingVector().set(index, bb.array());
+ }
+ }
+
+ static class LocationType extends ExtensionType {
+
+ @Override
+ public ArrowType storageType() {
+ return Struct.INSTANCE;
+ }
+
+ @Override
+ public String extensionName() {
+ return "location";
+ }
+
+ @Override
+ public boolean extensionEquals(ExtensionType other) {
+ return other instanceof LocationType;
+ }
+
+ @Override
+ public ArrowType deserialize(ArrowType storageType, String serializedData) {
+ if (!storageType.equals(storageType())) {
+ throw new UnsupportedOperationException("Cannot construct LocationType from underlying type " + storageType);
+ }
+ return new LocationType();
+ }
+
+ @Override
+ public String serialize() {
+ return "";
+ }
+
+ @Override
+ public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ return new LocationVector(name, allocator);
+ }
+ }
+
+ static class LocationVector extends ExtensionTypeVector<StructVector> {
+
+ private static StructVector buildUnderlyingVector(String name, BufferAllocator allocator) {
+ final StructVector underlyingVector =
+ new StructVector(name, allocator, FieldType.nullable(ArrowType.Struct.INSTANCE), null);
+ underlyingVector.addOrGet("Latitude",
+ FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)), Float4Vector.class);
+ underlyingVector.addOrGet("Longitude",
+ FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)), Float4Vector.class);
+ return underlyingVector;
+ }
+
+ public LocationVector(String name, BufferAllocator allocator) {
+ super(name, allocator, buildUnderlyingVector(name, allocator));
+ }
+
+ @Override
+ public int hashCode(int index) {
+ return hashCode(index, null);
+ }
+
+ @Override
+ public int hashCode(int index, ArrowBufHasher hasher) {
+ return getUnderlyingVector().hashCode(index, hasher);
+ }
+
+ @Override
+ public java.util.Map<String, ?> getObject(int index) {
+ return getUnderlyingVector().getObject(index);
+ }
+
+ public void set(int index, float latitude, float longitude) {
+ getUnderlyingVector().getChild("Latitude", Float4Vector.class).set(index, latitude);
+ getUnderlyingVector().getChild("Longitude", Float4Vector.class).set(index, longitude);
+ getUnderlyingVector().setIndexDefined(index);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestField.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestField.java
new file mode 100644
index 000000000..bc984fa64
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestField.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.types.pojo;
+
+import static org.apache.arrow.vector.types.pojo.Schema.METADATA_KEY;
+import static org.apache.arrow.vector.types.pojo.Schema.METADATA_VALUE;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.arrow.vector.types.pojo.ArrowType.Int;
+import org.junit.Test;
+
+public class TestField {
+
+ private static Field field(String name, boolean nullable, ArrowType type, Map<String, String> metadata) {
+ return new Field(name, new FieldType(nullable, type, null, metadata), Collections.emptyList());
+ }
+
+ @Test
+ public void testMetadata() throws IOException {
+ Map<String, String> metadata = new HashMap<>(1);
+ metadata.put("testKey", "testValue");
+
+ Schema schema = new Schema(Collections.singletonList(
+ field("a", false, new Int(8, true), metadata)
+ ));
+
+ String json = schema.toJson();
+ Schema actual = Schema.fromJSON(json);
+
+ jsonContains(json, "\"" + METADATA_KEY + "\" : \"testKey\"", "\"" + METADATA_VALUE + "\" : \"testValue\"");
+
+ Map<String, String> actualMetadata = actual.getFields().get(0).getMetadata();
+ assertEquals(1, actualMetadata.size());
+ assertEquals("testValue", actualMetadata.get("testKey"));
+ }
+
+ private void jsonContains(String json, String... strings) {
+ for (String string : strings) {
+ assertTrue(json + " contains " + string, json.contains(string));
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java
new file mode 100644
index 000000000..0e5375865
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java
@@ -0,0 +1,254 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.types.pojo;
+
+import static java.util.Arrays.asList;
+import static org.apache.arrow.vector.types.pojo.Schema.METADATA_KEY;
+import static org.apache.arrow.vector.types.pojo.Schema.METADATA_VALUE;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.arrow.vector.types.DateUnit;
+import org.apache.arrow.vector.types.FloatingPointPrecision;
+import org.apache.arrow.vector.types.IntervalUnit;
+import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.UnionMode;
+import org.apache.arrow.vector.types.pojo.ArrowType.Binary;
+import org.apache.arrow.vector.types.pojo.ArrowType.Bool;
+import org.apache.arrow.vector.types.pojo.ArrowType.Date;
+import org.apache.arrow.vector.types.pojo.ArrowType.Decimal;
+import org.apache.arrow.vector.types.pojo.ArrowType.Duration;
+import org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeBinary;
+import org.apache.arrow.vector.types.pojo.ArrowType.FloatingPoint;
+import org.apache.arrow.vector.types.pojo.ArrowType.Int;
+import org.apache.arrow.vector.types.pojo.ArrowType.Interval;
+import org.apache.arrow.vector.types.pojo.ArrowType.List;
+import org.apache.arrow.vector.types.pojo.ArrowType.Null;
+import org.apache.arrow.vector.types.pojo.ArrowType.Struct;
+import org.apache.arrow.vector.types.pojo.ArrowType.Time;
+import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp;
+import org.apache.arrow.vector.types.pojo.ArrowType.Union;
+import org.apache.arrow.vector.types.pojo.ArrowType.Utf8;
+import org.junit.Test;
+
+public class TestSchema {
+
+ private static Field field(String name, boolean nullable, ArrowType type, Field... children) {
+ return new Field(name, new FieldType(nullable, type, null, null), asList(children));
+ }
+
+ private static Field field(String name, ArrowType type, Field... children) {
+ return field(name, true, type, children);
+ }
+
+ @Test
+ public void testComplex() throws IOException {
+ Schema schema = new Schema(asList(
+ field("a", false, new Int(8, true)),
+ field("b", new Struct(),
+ field("c", new Int(16, true)),
+ field("d", new Utf8())),
+ field("e", new List(), field(null, new Date(DateUnit.MILLISECOND))),
+ field("f", new FloatingPoint(FloatingPointPrecision.SINGLE)),
+ field("g", new Timestamp(TimeUnit.MILLISECOND, "UTC")),
+ field("h", new Timestamp(TimeUnit.MICROSECOND, null)),
+ field("i", new Interval(IntervalUnit.DAY_TIME)),
+ field("j", new ArrowType.Duration(TimeUnit.SECOND))
+ ));
+ roundTrip(schema);
+ assertEquals(
+ "Schema<a: Int(8, true) not null, b: Struct<c: Int(16, true), d: Utf8>, e: List<Date(MILLISECOND)>, " +
+ "f: FloatingPoint(SINGLE), g: Timestamp(MILLISECOND, UTC), h: Timestamp(MICROSECOND, null), " +
+ "i: Interval(DAY_TIME), j: Duration(SECOND)>",
+ schema.toString());
+ }
+
+ @Test
+ public void testAll() throws IOException {
+ Schema schema = new Schema(asList(
+ field("a", false, new Null()),
+ field("b", new Struct(), field("ba", new Null())),
+ field("c", new List(), field("ca", new Null())),
+ field("d", new Union(UnionMode.Sparse, new int[] {1, 2, 3}), field("da", new Null())),
+ field("e", new Int(8, true)),
+ field("f", new FloatingPoint(FloatingPointPrecision.SINGLE)),
+ field("g", new Utf8()),
+ field("h", new Binary()),
+ field("i", new Bool()),
+ field("j", new Decimal(5, 5, 128)),
+ field("k", new Date(DateUnit.DAY)),
+ field("l", new Date(DateUnit.MILLISECOND)),
+ field("m", new Time(TimeUnit.SECOND, 32)),
+ field("n", new Time(TimeUnit.MILLISECOND, 32)),
+ field("o", new Time(TimeUnit.MICROSECOND, 64)),
+ field("p", new Time(TimeUnit.NANOSECOND, 64)),
+ field("q", new Timestamp(TimeUnit.MILLISECOND, "UTC")),
+ field("r", new Timestamp(TimeUnit.MICROSECOND, null)),
+ field("s", new Interval(IntervalUnit.DAY_TIME)),
+ field("t", new FixedSizeBinary(100)),
+ field("u", new Duration(TimeUnit.SECOND)),
+ field("v", new Duration(TimeUnit.MICROSECOND))
+ ));
+ roundTrip(schema);
+ }
+
+ @Test
+ public void testUnion() throws IOException {
+ Schema schema = new Schema(asList(
+ field("d", new Union(UnionMode.Sparse, new int[] {1, 2, 3}), field("da", new Null()))
+ ));
+ roundTrip(schema);
+ contains(schema, "Sparse");
+ }
+
+ @Test
+ public void testDate() throws IOException {
+ Schema schema = new Schema(asList(
+ field("a", new Date(DateUnit.DAY)),
+ field("b", new Date(DateUnit.MILLISECOND))
+ ));
+ roundTrip(schema);
+ assertEquals(
+ "Schema<a: Date(DAY), b: Date(MILLISECOND)>",
+ schema.toString());
+ }
+
+ @Test
+ public void testTime() throws IOException {
+ Schema schema = new Schema(asList(
+ field("a", new Time(TimeUnit.SECOND, 32)),
+ field("b", new Time(TimeUnit.MILLISECOND, 32)),
+ field("c", new Time(TimeUnit.MICROSECOND, 64)),
+ field("d", new Time(TimeUnit.NANOSECOND, 64))
+ ));
+ roundTrip(schema);
+ assertEquals(
+ "Schema<a: Time(SECOND, 32), b: Time(MILLISECOND, 32), c: Time(MICROSECOND, 64), d: Time(NANOSECOND, 64)>",
+ schema.toString());
+ }
+
+ @Test
+ public void testTS() throws IOException {
+ Schema schema = new Schema(asList(
+ field("a", new Timestamp(TimeUnit.SECOND, "UTC")),
+ field("b", new Timestamp(TimeUnit.MILLISECOND, "UTC")),
+ field("c", new Timestamp(TimeUnit.MICROSECOND, "UTC")),
+ field("d", new Timestamp(TimeUnit.NANOSECOND, "UTC")),
+ field("e", new Timestamp(TimeUnit.SECOND, null)),
+ field("f", new Timestamp(TimeUnit.MILLISECOND, null)),
+ field("g", new Timestamp(TimeUnit.MICROSECOND, null)),
+ field("h", new Timestamp(TimeUnit.NANOSECOND, null))
+ ));
+ roundTrip(schema);
+ assertEquals(
+ "Schema<a: Timestamp(SECOND, UTC), b: Timestamp(MILLISECOND, UTC), c: Timestamp(MICROSECOND, UTC), " +
+ "d: Timestamp(NANOSECOND, UTC), e: Timestamp(SECOND, null), f: Timestamp(MILLISECOND, null), " +
+ "g: Timestamp(MICROSECOND, null), h: Timestamp(NANOSECOND, null)>",
+ schema.toString());
+ }
+
+ @Test
+ public void testInterval() throws IOException {
+ Schema schema = new Schema(asList(
+ field("a", new Interval(IntervalUnit.YEAR_MONTH)),
+ field("b", new Interval(IntervalUnit.DAY_TIME))
+ ));
+ roundTrip(schema);
+ contains(schema, "YEAR_MONTH", "DAY_TIME");
+ }
+
+ @Test
+ public void testRoundTripDurationInterval() throws IOException {
+ Schema schema = new Schema(asList(
+ field("a", new Duration(TimeUnit.SECOND)),
+ field("b", new Duration(TimeUnit.MILLISECOND)),
+ field("c", new Duration(TimeUnit.MICROSECOND)),
+ field("d", new Duration(TimeUnit.NANOSECOND))
+ ));
+ roundTrip(schema);
+ contains(schema, "SECOND", "MILLI", "MICRO", "NANO");
+ }
+
+ @Test
+ public void testFP() throws IOException {
+ Schema schema = new Schema(asList(
+ field("a", new FloatingPoint(FloatingPointPrecision.HALF)),
+ field("b", new FloatingPoint(FloatingPointPrecision.SINGLE)),
+ field("c", new FloatingPoint(FloatingPointPrecision.DOUBLE))
+ ));
+ roundTrip(schema);
+ contains(schema, "HALF", "SINGLE", "DOUBLE");
+ }
+
+ @Test
+ public void testMetadata() throws IOException {
+ Map<String, String> metadata = new HashMap<>(1);
+ metadata.put("testKey", "testValue");
+
+ java.util.List<Field> fields = asList(
+ field("a", false, new Int(8, true)),
+ field("b", new Struct(),
+ field("c", new Int(16, true)),
+ field("d", new Utf8())),
+ field("e", new List(), field(null, new Date(DateUnit.MILLISECOND)))
+ );
+ Schema schema = new Schema(fields, metadata);
+ roundTrip(schema);
+ contains(schema, "\"" + METADATA_KEY + "\" : \"testKey\"", "\"" + METADATA_VALUE + "\" : \"testValue\"");
+ }
+
+ private void roundTrip(Schema schema) throws IOException {
+ String json = schema.toJson();
+ Schema actual = Schema.fromJSON(json);
+ assertEquals(schema.toJson(), actual.toJson());
+ assertEquals(schema, actual);
+ validateFieldsHashcode(schema.getFields(), actual.getFields());
+ assertEquals(schema.hashCode(), actual.hashCode());
+ }
+
+ private void validateFieldsHashcode(java.util.List<Field> schemaFields, java.util.List<Field> actualFields) {
+ assertEquals(schemaFields.size(), actualFields.size());
+ if (schemaFields.size() == 0) {
+ return;
+ }
+ for (int i = 0; i < schemaFields.size(); i++) {
+ Field schemaField = schemaFields.get(i);
+ Field actualField = actualFields.get(i);
+ validateFieldsHashcode(schemaField.getChildren(), actualField.getChildren());
+ validateHashCode(schemaField.getType(), actualField.getType());
+ validateHashCode(schemaField, actualField);
+ }
+ }
+
+ private void validateHashCode(Object o1, Object o2) {
+ assertEquals(o1, o2);
+ assertEquals(o1 + " == " + o2, o1.hashCode(), o2.hashCode());
+ }
+
+ private void contains(Schema schema, String... s) {
+ String json = schema.toJson();
+ for (String string : s) {
+ assertTrue(json + " contains " + string, json.contains(string));
+ }
+ }
+
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/DecimalUtilityTest.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/DecimalUtilityTest.java
new file mode 100644
index 000000000..804092ed9
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/DecimalUtilityTest.java
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import java.math.BigDecimal;
+import java.math.BigInteger;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class DecimalUtilityTest {
+ private static final BigInteger[] MAX_BIG_INT = new BigInteger[]{BigInteger.valueOf(10).pow(38)
+ .subtract(java.math.BigInteger.ONE), java.math.BigInteger.valueOf(10).pow(76)};
+ private static final BigInteger[] MIN_BIG_INT = new BigInteger[]{MAX_BIG_INT[0].multiply(BigInteger.valueOf(-1)),
+ MAX_BIG_INT[1].multiply(BigInteger.valueOf(-1))};
+
+ @Test
+ public void testSetLongInDecimalArrowBuf() {
+ int[] byteLengths = new int[]{16, 32};
+ for (int x = 0; x < 2; x++) {
+ try (BufferAllocator allocator = new RootAllocator(128);
+ ArrowBuf buf = allocator.buffer(byteLengths[x]);
+ ) {
+ int [] intValues = new int [] {Integer.MAX_VALUE, Integer.MIN_VALUE, 0};
+ for (int val : intValues) {
+ buf.clear();
+ DecimalUtility.writeLongToArrowBuf((long) val, buf, 0, byteLengths[x]);
+ BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0, byteLengths[x]);
+ BigDecimal expected = BigDecimal.valueOf(val);
+ Assert.assertEquals(expected, actual);
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testSetByteArrayInDecimalArrowBuf() {
+ int[] byteLengths = new int[]{16, 32};
+ for (int x = 0; x < 2; x++) {
+ try (BufferAllocator allocator = new RootAllocator(128);
+ ArrowBuf buf = allocator.buffer(byteLengths[x]);
+ ) {
+ int [] intValues = new int [] {Integer.MAX_VALUE, Integer.MIN_VALUE, 0};
+ for (int val : intValues) {
+ buf.clear();
+ DecimalUtility.writeByteArrayToArrowBuf(BigInteger.valueOf(val).toByteArray(), buf, 0, byteLengths[x]);
+ BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0, byteLengths[x]);
+ BigDecimal expected = BigDecimal.valueOf(val);
+ Assert.assertEquals(expected, actual);
+ }
+
+ long [] longValues = new long[] {Long.MIN_VALUE, 0 , Long.MAX_VALUE};
+ for (long val : longValues) {
+ buf.clear();
+ DecimalUtility.writeByteArrayToArrowBuf(BigInteger.valueOf(val).toByteArray(), buf, 0, byteLengths[x]);
+ BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0, byteLengths[x]);
+ BigDecimal expected = BigDecimal.valueOf(val);
+ Assert.assertEquals(expected, actual);
+ }
+
+ BigInteger [] decimals = new BigInteger[] {MAX_BIG_INT[x], new BigInteger("0"), MIN_BIG_INT[x]};
+ for (BigInteger val : decimals) {
+ buf.clear();
+ DecimalUtility.writeByteArrayToArrowBuf(val.toByteArray(), buf, 0, byteLengths[x]);
+ BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0, byteLengths[x]);
+ BigDecimal expected = new BigDecimal(val);
+ Assert.assertEquals(expected, actual);
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testSetBigDecimalInDecimalArrowBuf() {
+ int[] byteLengths = new int[]{16, 32};
+ for (int x = 0; x < 2; x++) {
+ try (BufferAllocator allocator = new RootAllocator(128);
+ ArrowBuf buf = allocator.buffer(byteLengths[x]);
+ ) {
+ int [] intValues = new int [] {Integer.MAX_VALUE, Integer.MIN_VALUE, 0};
+ for (int val : intValues) {
+ buf.clear();
+ DecimalUtility.writeBigDecimalToArrowBuf(BigDecimal.valueOf(val), buf, 0, byteLengths[x]);
+ BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0, byteLengths[x]);
+ BigDecimal expected = BigDecimal.valueOf(val);
+ Assert.assertEquals(expected, actual);
+ }
+
+ long [] longValues = new long[] {Long.MIN_VALUE, 0 , Long.MAX_VALUE};
+ for (long val : longValues) {
+ buf.clear();
+ DecimalUtility.writeBigDecimalToArrowBuf(BigDecimal.valueOf(val), buf, 0, byteLengths[x]);
+ BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0, byteLengths[x]);
+ BigDecimal expected = BigDecimal.valueOf(val);
+ Assert.assertEquals(expected, actual);
+ }
+
+ BigInteger [] decimals = new BigInteger[] {MAX_BIG_INT[x], new BigInteger("0"), MIN_BIG_INT[x]};
+ for (BigInteger val : decimals) {
+ buf.clear();
+ DecimalUtility.writeBigDecimalToArrowBuf(new BigDecimal(val), buf, 0, byteLengths[x]);
+ BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0, byteLengths[x]);
+ BigDecimal expected = new BigDecimal(val);
+ Assert.assertEquals(expected, actual);
+ }
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestDataSizeRoundingUtil.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestDataSizeRoundingUtil.java
new file mode 100644
index 000000000..4138ea9d7
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestDataSizeRoundingUtil.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import static org.junit.Assert.assertEquals;
+
+import org.junit.Test;
+
+/**
+ * Test cases for {@link DataSizeRoundingUtil}.
+ */
+public class TestDataSizeRoundingUtil {
+
+ @Test
+ public void testRoundUpTo8MultipleInt() {
+ assertEquals(0, DataSizeRoundingUtil.roundUpTo8Multiple(0));
+ assertEquals(16, DataSizeRoundingUtil.roundUpTo8Multiple(9));
+ assertEquals(24, DataSizeRoundingUtil.roundUpTo8Multiple(20));
+ assertEquals(128, DataSizeRoundingUtil.roundUpTo8Multiple(128));
+ }
+
+ @Test
+ public void testRoundUpTo8MultipleLong() {
+ assertEquals(0L, DataSizeRoundingUtil.roundUpTo8Multiple(0L));
+ assertEquals(40L, DataSizeRoundingUtil.roundUpTo8Multiple(37L));
+ assertEquals(32L, DataSizeRoundingUtil.roundUpTo8Multiple(29L));
+ assertEquals(512L, DataSizeRoundingUtil.roundUpTo8Multiple(512L));
+ }
+
+ @Test
+ public void testRoundDownTo8MultipleInt() {
+ assertEquals(0, DataSizeRoundingUtil.roundDownTo8Multiple(0));
+ assertEquals(16, DataSizeRoundingUtil.roundDownTo8Multiple(23));
+ assertEquals(24, DataSizeRoundingUtil.roundDownTo8Multiple(27));
+ assertEquals(128, DataSizeRoundingUtil.roundDownTo8Multiple(128));
+ }
+
+ @Test
+ public void testRoundDownTo8MultipleLong() {
+ assertEquals(0L, DataSizeRoundingUtil.roundDownTo8Multiple(0L));
+ assertEquals(40L, DataSizeRoundingUtil.roundDownTo8Multiple(45L));
+ assertEquals(32L, DataSizeRoundingUtil.roundDownTo8Multiple(39L));
+ assertEquals(512L, DataSizeRoundingUtil.roundDownTo8Multiple(512L));
+ }
+
+ @Test
+ public void testDivideBy8CeilInt() {
+ assertEquals(0, DataSizeRoundingUtil.divideBy8Ceil(0));
+ assertEquals(3, DataSizeRoundingUtil.divideBy8Ceil(23));
+ assertEquals(5, DataSizeRoundingUtil.divideBy8Ceil(35));
+ assertEquals(24, DataSizeRoundingUtil.divideBy8Ceil(192));
+ }
+
+ @Test
+ public void testDivideBy8CeilLong() {
+ assertEquals(0L, DataSizeRoundingUtil.divideBy8Ceil(0L));
+ assertEquals(5L, DataSizeRoundingUtil.divideBy8Ceil(37L));
+ assertEquals(10L, DataSizeRoundingUtil.divideBy8Ceil(73L));
+ assertEquals(25L, DataSizeRoundingUtil.divideBy8Ceil(200L));
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestElementAddressableVectorIterator.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestElementAddressableVectorIterator.java
new file mode 100644
index 000000000..419872225
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestElementAddressableVectorIterator.java
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import static junit.framework.TestCase.assertNull;
+import static org.junit.Assert.assertEquals;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.memory.util.ArrowBufPointer;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Test cases for {@link ElementAddressableVectorIterator}.
+ */
+public class TestElementAddressableVectorIterator {
+
+ private final int VECTOR_LENGTH = 100;
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void prepare() {
+ allocator = new RootAllocator(1024 * 1024);
+ }
+
+ @After
+ public void shutdown() {
+ allocator.close();
+ }
+
+ @Test
+ public void testIterateIntVector() {
+ try (IntVector intVector = new IntVector("", allocator)) {
+ intVector.allocateNew(VECTOR_LENGTH);
+ intVector.setValueCount(VECTOR_LENGTH);
+
+ // prepare data in sorted order
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ if (i == 0) {
+ intVector.setNull(i);
+ } else {
+ intVector.set(i, i);
+ }
+ }
+
+ // iterate
+ ElementAddressableVectorIterator<IntVector> it = new ElementAddressableVectorIterator<>(intVector);
+ int index = 0;
+ while (it.hasNext()) {
+ ArrowBufPointer pt;
+
+ if (index % 2 == 0) {
+ // use populated pointer.
+ pt = new ArrowBufPointer();
+ it.next(pt);
+ } else {
+ // use iterator inner pointer
+ pt = it.next();
+ }
+ if (index == 0) {
+ assertNull(pt.getBuf());
+ } else {
+ assertEquals(index, pt.getBuf().getInt(pt.getOffset()));
+ }
+ index += 1;
+ }
+ }
+ }
+
+ @Test
+ public void testIterateVarCharVector() {
+ try (VarCharVector strVector = new VarCharVector("", allocator)) {
+ strVector.allocateNew(VECTOR_LENGTH * 10, VECTOR_LENGTH);
+ strVector.setValueCount(VECTOR_LENGTH);
+
+ // prepare data in sorted order
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ if (i == 0) {
+ strVector.setNull(i);
+ } else {
+ strVector.set(i, String.valueOf(i).getBytes());
+ }
+ }
+
+ // iterate
+ ElementAddressableVectorIterator<VarCharVector> it = new ElementAddressableVectorIterator<>(strVector);
+ int index = 0;
+ while (it.hasNext()) {
+ ArrowBufPointer pt;
+
+ if (index % 2 == 0) {
+ // use populated pointer.
+ pt = new ArrowBufPointer();
+ it.next(pt);
+ } else {
+ // use iterator inner pointer
+ pt = it.next();
+ }
+
+ if (index == 0) {
+ assertNull(pt.getBuf());
+ } else {
+ String expected = String.valueOf(index);
+ byte[] actual = new byte[expected.length()];
+ assertEquals(expected.length(), pt.getLength());
+
+ pt.getBuf().getBytes(pt.getOffset(), actual);
+ assertEquals(expected, new String(actual));
+ }
+ index += 1;
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestMultiMapWithOrdinal.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestMultiMapWithOrdinal.java
new file mode 100644
index 000000000..ea829060d
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestMultiMapWithOrdinal.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestMultiMapWithOrdinal {
+
+ @Test
+ public void test() {
+ MultiMapWithOrdinal<String, String> map = new MultiMapWithOrdinal<>();
+
+ map.put("x", "1", false);
+ Assert.assertEquals(1, map.size());
+ map.remove("x", "1");
+ Assert.assertTrue(map.isEmpty());
+ map.put("x", "1", false);
+ map.put("x", "2", false);
+ map.put("y", "0", false);
+ Assert.assertEquals(3, map.size());
+ Assert.assertEquals(2, map.getAll("x").size());
+ Assert.assertEquals("1", map.getAll("x").stream().findFirst().get());
+ Assert.assertEquals("1", map.getByOrdinal(0));
+ Assert.assertEquals("2", map.getByOrdinal(1));
+ Assert.assertEquals("0", map.getByOrdinal(2));
+ Assert.assertTrue(map.remove("x", "1"));
+ Assert.assertFalse(map.remove("x", "1"));
+ Assert.assertEquals("0", map.getByOrdinal(0));
+ Assert.assertEquals(2, map.size());
+ map.put("x", "3", true);
+ Assert.assertEquals(1, map.getAll("x").size());
+ Assert.assertEquals("3", map.getAll("x").stream().findFirst().get());
+ map.put("z", "4", false);
+ Assert.assertEquals(3, map.size());
+ map.put("z", "5", false);
+ map.put("z", "6", false);
+ Assert.assertEquals(5, map.size());
+ map.removeAll("z");
+ Assert.assertEquals(2, map.size());
+ Assert.assertFalse(map.containsKey("z"));
+
+
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestValidator.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestValidator.java
new file mode 100644
index 000000000..2db70ca5d
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestValidator.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import static org.apache.arrow.vector.util.Validator.equalEnough;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import org.junit.Test;
+
+public class TestValidator {
+
+ @Test
+ public void testFloatComp() {
+ assertTrue(equalEnough(912.4140000000002F, 912.414F));
+ assertTrue(equalEnough(912.4140000000002D, 912.414D));
+ assertTrue(equalEnough(912.414F, 912.4140000000002F));
+ assertTrue(equalEnough(912.414D, 912.4140000000002D));
+ assertFalse(equalEnough(912.414D, 912.4140001D));
+ assertFalse(equalEnough(null, 912.414D));
+ assertTrue(equalEnough((Float) null, null));
+ assertTrue(equalEnough((Double) null, null));
+ assertFalse(equalEnough(912.414D, null));
+ assertFalse(equalEnough(Double.MAX_VALUE, Double.MIN_VALUE));
+ assertFalse(equalEnough(Double.MIN_VALUE, Double.MAX_VALUE));
+ assertTrue(equalEnough(Double.MAX_VALUE, Double.MAX_VALUE));
+ assertTrue(equalEnough(Double.MIN_VALUE, Double.MIN_VALUE));
+ assertTrue(equalEnough(Double.NEGATIVE_INFINITY, Double.NEGATIVE_INFINITY));
+ assertFalse(equalEnough(Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY));
+ assertTrue(equalEnough(Double.NaN, Double.NaN));
+ assertFalse(equalEnough(1.0, Double.NaN));
+ assertFalse(equalEnough(Float.MAX_VALUE, Float.MIN_VALUE));
+ assertFalse(equalEnough(Float.MIN_VALUE, Float.MAX_VALUE));
+ assertTrue(equalEnough(Float.MAX_VALUE, Float.MAX_VALUE));
+ assertTrue(equalEnough(Float.MIN_VALUE, Float.MIN_VALUE));
+ assertTrue(equalEnough(Float.NEGATIVE_INFINITY, Float.NEGATIVE_INFINITY));
+ assertFalse(equalEnough(Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY));
+ assertTrue(equalEnough(Float.NaN, Float.NaN));
+ assertFalse(equalEnough(1.0F, Float.NaN));
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java
new file mode 100644
index 000000000..1cd263120
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java
@@ -0,0 +1,794 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import static junit.framework.TestCase.assertEquals;
+import static junit.framework.TestCase.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.LargeVarCharVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.compare.Range;
+import org.apache.arrow.vector.compare.RangeEqualsVisitor;
+import org.apache.arrow.vector.compare.TypeEqualsVisitor;
+import org.apache.arrow.vector.complex.DenseUnionVector;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.holders.NullableBigIntHolder;
+import org.apache.arrow.vector.holders.NullableFloat4Holder;
+import org.apache.arrow.vector.holders.NullableIntHolder;
+import org.apache.arrow.vector.testing.ValueVectorDataPopulator;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Test cases for {@link VectorAppender}.
+ */
+public class TestVectorAppender {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void prepare() {
+ allocator = new RootAllocator(1024 * 1024);
+ }
+
+ @After
+ public void shutdown() {
+ allocator.close();
+ }
+
+ @Test
+ public void testAppendFixedWidthVector() {
+ final int length1 = 10;
+ final int length2 = 5;
+ try (IntVector target = new IntVector("", allocator);
+ IntVector delta = new IntVector("", allocator)) {
+
+ target.allocateNew(length1);
+ delta.allocateNew(length2);
+
+ ValueVectorDataPopulator.setVector(target, 0, 1, 2, 3, 4, 5, 6, null, 8, 9);
+ ValueVectorDataPopulator.setVector(delta, null, 11, 12, 13, 14);
+
+ VectorAppender appender = new VectorAppender(target);
+ delta.accept(appender, null);
+
+ assertEquals(length1 + length2, target.getValueCount());
+
+ try (IntVector expected = new IntVector("expected", allocator)) {
+ expected.allocateNew();
+ ValueVectorDataPopulator.setVector(expected, 0, 1, 2, 3, 4, 5, 6, null, 8, 9, null, 11, 12, 13, 14);
+ assertVectorsEqual(expected, target);
+ }
+ }
+ }
+
+ @Test
+ public void testAppendEmptyFixedWidthVector() {
+ try (IntVector target = new IntVector("", allocator);
+ IntVector delta = new IntVector("", allocator)) {
+
+ ValueVectorDataPopulator.setVector(target, 0, 1, 2, 3, 4, 5, 6, null, 8, 9);
+
+ VectorAppender appender = new VectorAppender(target);
+ delta.accept(appender, null);
+
+ assertEquals(10, target.getValueCount());
+
+ try (IntVector expected = new IntVector("expected", allocator)) {
+ ValueVectorDataPopulator.setVector(expected, 0, 1, 2, 3, 4, 5, 6, null, 8, 9);
+ assertVectorsEqual(expected, target);
+ }
+ }
+ }
+
+ @Test
+ public void testAppendVariableWidthVector() {
+ final int length1 = 10;
+ final int length2 = 5;
+ try (VarCharVector target = new VarCharVector("", allocator);
+ VarCharVector delta = new VarCharVector("", allocator)) {
+
+ target.allocateNew(5, length1);
+ delta.allocateNew(5, length2);
+
+ ValueVectorDataPopulator.setVector(target, "a0", "a1", "a2", "a3", null, "a5", "a6", "a7", "a8", "a9");
+ ValueVectorDataPopulator.setVector(delta, "a10", "a11", "a12", "a13", null);
+
+ VectorAppender appender = new VectorAppender(target);
+ delta.accept(appender, null);
+
+ try (VarCharVector expected = new VarCharVector("expected", allocator)) {
+ expected.allocateNew();
+ ValueVectorDataPopulator.setVector(expected,
+ "a0", "a1", "a2", "a3", null, "a5", "a6", "a7", "a8", "a9", "a10", "a11", "a12", "a13", null);
+ assertVectorsEqual(expected, target);
+ }
+ }
+ }
+
+ @Test
+ public void testAppendEmptyVariableWidthVector() {
+ try (VarCharVector target = new VarCharVector("", allocator);
+ VarCharVector delta = new VarCharVector("", allocator)) {
+
+ ValueVectorDataPopulator.setVector(target, "a0", "a1", "a2", "a3", null, "a5", "a6", "a7", "a8", "a9");
+
+ VectorAppender appender = new VectorAppender(target);
+ delta.accept(appender, null);
+
+ try (VarCharVector expected = new VarCharVector("expected", allocator)) {
+ ValueVectorDataPopulator.setVector(expected,
+ "a0", "a1", "a2", "a3", null, "a5", "a6", "a7", "a8", "a9");
+ assertVectorsEqual(expected, target);
+ }
+ }
+ }
+
+ @Test
+ public void testAppendLargeVariableWidthVector() {
+ final int length1 = 5;
+ final int length2 = 10;
+ try (LargeVarCharVector target = new LargeVarCharVector("", allocator);
+ LargeVarCharVector delta = new LargeVarCharVector("", allocator)) {
+
+ target.allocateNew(5, length1);
+ delta.allocateNew(5, length2);
+
+ ValueVectorDataPopulator.setVector(target, "a0", null, "a2", "a3", null);
+ ValueVectorDataPopulator.setVector(delta, "a5", "a6", "a7", null, null, "a10", "a11", "a12", "a13", null);
+
+ VectorAppender appender = new VectorAppender(target);
+ delta.accept(appender, null);
+
+ try (LargeVarCharVector expected = new LargeVarCharVector("expected", allocator)) {
+ expected.allocateNew();
+ ValueVectorDataPopulator.setVector(expected,
+ "a0", null, "a2", "a3", null, "a5", "a6", "a7", null, null, "a10", "a11", "a12", "a13", null);
+ assertVectorsEqual(expected, target);
+ }
+ }
+ }
+
+ @Test
+ public void testAppendEmptyLargeVariableWidthVector() {
+ try (LargeVarCharVector target = new LargeVarCharVector("", allocator);
+ LargeVarCharVector delta = new LargeVarCharVector("", allocator)) {
+
+ ValueVectorDataPopulator.setVector(target, "a0", null, "a2", "a3", null);
+
+ VectorAppender appender = new VectorAppender(target);
+ delta.accept(appender, null);
+
+ try (LargeVarCharVector expected = new LargeVarCharVector("expected", allocator)) {
+ ValueVectorDataPopulator.setVector(expected, "a0", null, "a2", "a3", null);
+ assertVectorsEqual(expected, target);
+ }
+ }
+ }
+
+ @Test
+ public void testAppendListVector() {
+ final int length1 = 5;
+ final int length2 = 2;
+ try (ListVector target = ListVector.empty("target", allocator);
+ ListVector delta = ListVector.empty("delta", allocator)) {
+
+ target.allocateNew();
+ ValueVectorDataPopulator.setVector(target,
+ Arrays.asList(0, 1),
+ Arrays.asList(2, 3),
+ null,
+ Arrays.asList(6, 7),
+ Arrays.asList(8, 9));
+ assertEquals(length1, target.getValueCount());
+
+ delta.allocateNew();
+ ValueVectorDataPopulator.setVector(delta,
+ Arrays.asList(10, 11, 12, 13, 14),
+ Arrays.asList(15, 16, 17, 18, 19));
+ assertEquals(length2, delta.getValueCount());
+
+ VectorAppender appender = new VectorAppender(target);
+ delta.accept(appender, null);
+
+ assertEquals(7, target.getValueCount());
+
+ List<Integer> expected = Arrays.asList(0, 1);
+ assertEquals(expected, target.getObject(0));
+
+ expected = Arrays.asList(2, 3);
+ assertEquals(expected, target.getObject(1));
+
+ assertTrue(target.isNull(2));
+
+ expected = Arrays.asList(6, 7);
+ assertEquals(expected, target.getObject(3));
+
+ expected = Arrays.asList(8, 9);
+ assertEquals(expected, target.getObject(4));
+
+ expected = Arrays.asList(10, 11, 12, 13, 14);
+ assertEquals(expected, target.getObject(5));
+
+ expected = Arrays.asList(15, 16, 17, 18, 19);
+ assertEquals(expected, target.getObject(6));
+ }
+ }
+
+ @Test
+ public void testAppendEmptyListVector() {
+ try (ListVector target = ListVector.empty("target", allocator);
+ ListVector delta = ListVector.empty("delta", allocator)) {
+ // populate target with data
+ ValueVectorDataPopulator.setVector(target,
+ Arrays.asList(0, 1),
+ Arrays.asList(2, 3),
+ null,
+ Arrays.asList(6, 7));
+ assertEquals(4, target.getValueCount());
+
+ // leave delta vector empty and unallocated
+ delta.addOrGetVector(FieldType.nullable(Types.MinorType.INT.getType()));
+
+ VectorAppender appender = new VectorAppender(target);
+ delta.accept(appender, null);
+
+ // verify delta vector has original data
+ assertEquals(4, target.getValueCount());
+
+ List<Integer> expected = Arrays.asList(0, 1);
+ assertEquals(expected, target.getObject(0));
+
+ expected = Arrays.asList(2, 3);
+ assertEquals(expected, target.getObject(1));
+
+ assertTrue(target.isNull(2));
+
+ expected = Arrays.asList(6, 7);
+ assertEquals(expected, target.getObject(3));
+ }
+ }
+
+ @Test
+ public void testAppendFixedSizeListVector() {
+ try (FixedSizeListVector target = FixedSizeListVector.empty("target", 5, allocator);
+ FixedSizeListVector delta = FixedSizeListVector.empty("delta", 5, allocator)) {
+
+ target.allocateNew();
+ ValueVectorDataPopulator.setVector(target,
+ Arrays.asList(0, 1, 2, 3, 4),
+ null);
+ assertEquals(2, target.getValueCount());
+
+ delta.allocateNew();
+ ValueVectorDataPopulator.setVector(delta,
+ Arrays.asList(10, 11, 12, 13, 14),
+ Arrays.asList(15, 16, 17, 18, 19));
+ assertEquals(2, delta.getValueCount());
+
+ VectorAppender appender = new VectorAppender(target);
+ delta.accept(appender, null);
+
+ assertEquals(4, target.getValueCount());
+
+ assertEquals(Arrays.asList(0, 1, 2, 3, 4), target.getObject(0));
+ assertTrue(target.isNull(1));
+ assertEquals(Arrays.asList(10, 11, 12, 13, 14), target.getObject(2));
+ assertEquals(Arrays.asList(15, 16, 17, 18, 19), target.getObject(3));
+ }
+ }
+
+ @Test
+ public void testAppendEmptyFixedSizeListVector() {
+ try (FixedSizeListVector target = FixedSizeListVector.empty("target", 5, allocator);
+ FixedSizeListVector delta = FixedSizeListVector.empty("delta", 5, allocator)) {
+
+ ValueVectorDataPopulator.setVector(target,
+ Arrays.asList(0, 1, 2, 3, 4),
+ null);
+ assertEquals(2, target.getValueCount());
+
+ // leave delta vector empty and unallocated
+ delta.addOrGetVector(FieldType.nullable(Types.MinorType.INT.getType()));
+
+ VectorAppender appender = new VectorAppender(target);
+ delta.accept(appender, null);
+
+ assertEquals(2, target.getValueCount());
+
+ assertEquals(Arrays.asList(0, 1, 2, 3, 4), target.getObject(0));
+ assertTrue(target.isNull(1));
+ }
+ }
+
+ @Test
+ public void testAppendEmptyLargeListVector() {
+ try (LargeListVector target = LargeListVector.empty("target", allocator);
+ LargeListVector delta = LargeListVector.empty("delta", allocator)) {
+
+ ValueVectorDataPopulator.setVector(target,
+ Arrays.asList(0, 1, 2, 3, 4),
+ null);
+ assertEquals(2, target.getValueCount());
+
+ // leave delta vector empty and unallocated
+ delta.addOrGetVector(FieldType.nullable(Types.MinorType.INT.getType()));
+
+ VectorAppender appender = new VectorAppender(target);
+ delta.accept(appender, null);
+
+ assertEquals(2, target.getValueCount());
+
+ assertEquals(Arrays.asList(0, 1, 2, 3, 4), target.getObject(0));
+ assertTrue(target.isNull(1));
+ }
+ }
+
+ @Test
+ public void testAppendStructVector() {
+ final int length1 = 10;
+ final int length2 = 5;
+ try (final StructVector target = StructVector.empty("target", allocator);
+ final StructVector delta = StructVector.empty("delta", allocator)) {
+
+ IntVector targetChild1 = target.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ VarCharVector targetChild2 = target.addOrGet("f1", FieldType.nullable(new ArrowType.Utf8()), VarCharVector.class);
+ targetChild1.allocateNew();
+ targetChild2.allocateNew();
+ ValueVectorDataPopulator.setVector(targetChild1, 0, 1, 2, 3, 4, null, 6, 7, 8, 9);
+ ValueVectorDataPopulator.setVector(targetChild2, "a0", "a1", "a2", "a3", "a4", "a5", "a6", null, "a8", "a9");
+ target.setValueCount(length1);
+
+ IntVector deltaChild1 = delta.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ VarCharVector deltaChild2 = delta.addOrGet("f1", FieldType.nullable(new ArrowType.Utf8()), VarCharVector.class);
+ deltaChild1.allocateNew();
+ deltaChild2.allocateNew();
+ ValueVectorDataPopulator.setVector(deltaChild1, 10, 11, 12, null, 14);
+ ValueVectorDataPopulator.setVector(deltaChild2, "a10", "a11", "a12", "a13", "a14");
+ delta.setValueCount(length2);
+
+ VectorAppender appender = new VectorAppender(target);
+ delta.accept(appender, null);
+
+ assertEquals(length1 + length2, target.getValueCount());
+ IntVector child1 = (IntVector) target.getVectorById(0);
+ VarCharVector child2 = (VarCharVector) target.getVectorById(1);
+
+ try (IntVector expected1 = new IntVector("expected1", allocator);
+ VarCharVector expected2 = new VarCharVector("expected2", allocator)) {
+ expected1.allocateNew();
+ expected2.allocateNew();
+
+ ValueVectorDataPopulator.setVector(expected1, 0, 1, 2, 3, 4, null, 6, 7, 8, 9, 10, 11, 12, null, 14);
+ ValueVectorDataPopulator.setVector(expected2,
+ "a0", "a1", "a2", "a3", "a4", "a5", "a6", null, "a8", "a9", "a10", "a11", "a12", "a13", "a14");
+
+ assertVectorsEqual(expected1, target.getChild("f0"));
+ assertVectorsEqual(expected2, target.getChild("f1"));
+ }
+ }
+ }
+
+ @Test
+ public void testAppendEmptyStructVector() {
+ try (final StructVector target = StructVector.empty("target", allocator);
+ final StructVector delta = StructVector.empty("delta", allocator)) {
+
+ IntVector targetChild1 = target.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ VarCharVector targetChild2 = target.addOrGet("f1", FieldType.nullable(new ArrowType.Utf8()), VarCharVector.class);
+ ValueVectorDataPopulator.setVector(targetChild1, 0, 1, 2, 3, 4, null, 6, 7, 8, 9);
+ ValueVectorDataPopulator.setVector(targetChild2, "a0", "a1", "a2", "a3", "a4", "a5", "a6", null, "a8", "a9");
+ target.setValueCount(10);
+
+ // leave delta vector fields empty and unallocated
+ delta.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ delta.addOrGet("f1", FieldType.nullable(new ArrowType.Utf8()), VarCharVector.class);
+
+ VectorAppender appender = new VectorAppender(target);
+ delta.accept(appender, null);
+
+ assertEquals(10, target.getValueCount());
+
+ try (IntVector expected1 = new IntVector("expected1", allocator);
+ VarCharVector expected2 = new VarCharVector("expected2", allocator)) {
+ ValueVectorDataPopulator.setVector(expected1, 0, 1, 2, 3, 4, null, 6, 7, 8, 9);
+ ValueVectorDataPopulator.setVector(expected2,
+ "a0", "a1", "a2", "a3", "a4", "a5", "a6", null, "a8", "a9");
+
+ assertVectorsEqual(expected1, target.getChild("f0"));
+ assertVectorsEqual(expected2, target.getChild("f1"));
+ }
+ }
+ }
+
+ @Test
+ public void testAppendUnionVector() {
+ final int length1 = 10;
+ final int length2 = 5;
+
+ try (final UnionVector target = UnionVector.empty("target", allocator);
+ final UnionVector delta = UnionVector.empty("delta", allocator)) {
+
+ // alternating ints and big ints
+ target.setType(0, Types.MinorType.INT);
+ target.setType(1, Types.MinorType.BIGINT);
+ target.setType(2, Types.MinorType.INT);
+ target.setType(3, Types.MinorType.BIGINT);
+ target.setType(4, Types.MinorType.INT);
+ target.setType(5, Types.MinorType.BIGINT);
+ target.setType(6, Types.MinorType.INT);
+ target.setType(7, Types.MinorType.BIGINT);
+ target.setType(8, Types.MinorType.INT);
+ target.setType(9, Types.MinorType.BIGINT);
+ target.setType(10, Types.MinorType.INT);
+ target.setType(11, Types.MinorType.BIGINT);
+ target.setType(12, Types.MinorType.INT);
+ target.setType(13, Types.MinorType.BIGINT);
+ target.setType(14, Types.MinorType.INT);
+ target.setType(15, Types.MinorType.BIGINT);
+ target.setType(16, Types.MinorType.INT);
+ target.setType(17, Types.MinorType.BIGINT);
+ target.setType(18, Types.MinorType.INT);
+ target.setType(19, Types.MinorType.BIGINT);
+
+ IntVector targetIntVec = target.getIntVector();
+ targetIntVec.allocateNew();
+ ValueVectorDataPopulator.setVector(
+ targetIntVec,
+ 0, null, 1, null, 2, null, 3, null, 4, null, 5, null, 6, null, 7, null, 8, null, 9, null);
+ assertEquals(length1 * 2, targetIntVec.getValueCount());
+
+ BigIntVector targetBigIntVec = target.getBigIntVector();
+ targetBigIntVec.allocateNew();
+ ValueVectorDataPopulator.setVector(
+ targetBigIntVec,
+ null, 0L, null, 1L, null, 2L, null, 3L, null, 4L, null, 5L, null, 6L, null, 7L, null, 8L, null, 9L);
+ assertEquals(length1 * 2, targetBigIntVec.getValueCount());
+
+ target.setValueCount(length1 * 2);
+
+ // populate the delta vector
+ delta.setType(0, Types.MinorType.FLOAT4);
+ delta.setType(1, Types.MinorType.FLOAT4);
+ delta.setType(2, Types.MinorType.FLOAT4);
+ delta.setType(3, Types.MinorType.FLOAT4);
+ delta.setType(4, Types.MinorType.FLOAT4);
+
+ Float4Vector deltaFloatVector = delta.getFloat4Vector();
+ deltaFloatVector.allocateNew();
+ ValueVectorDataPopulator.setVector(deltaFloatVector, 10f, 11f, 12f, 13f, 14f);
+ assertEquals(length2, deltaFloatVector.getValueCount());
+ delta.setValueCount(length2);
+
+ VectorAppender appender = new VectorAppender(target);
+ delta.accept(appender, null);
+
+ assertEquals(length1 * 2 + length2, target.getValueCount());
+
+ for (int i = 0; i < length1; i++) {
+ Object intObj = target.getObject(i * 2);
+ assertTrue(intObj instanceof Integer);
+ assertEquals(i, ((Integer) intObj).intValue());
+
+ Object longObj = target.getObject(i * 2 + 1);
+ assertTrue(longObj instanceof Long);
+ assertEquals(i, ((Long) longObj).longValue());
+ }
+
+ for (int i = 0; i < length2; i++) {
+ Object floatObj = target.getObject(length1 * 2 + i);
+ assertTrue(floatObj instanceof Float);
+ assertEquals(i + length1, ((Float) floatObj).intValue());
+ }
+ }
+ }
+
+ @Test
+ public void testAppendEmptyUnionVector() {
+ final int length1 = 10;
+
+ try (final UnionVector target = UnionVector.empty("target", allocator);
+ final UnionVector delta = UnionVector.empty("delta", allocator)) {
+
+ // alternating ints and big ints
+ target.setType(0, Types.MinorType.INT);
+ target.setType(1, Types.MinorType.BIGINT);
+ target.setType(2, Types.MinorType.INT);
+ target.setType(3, Types.MinorType.BIGINT);
+ target.setType(4, Types.MinorType.INT);
+ target.setType(5, Types.MinorType.BIGINT);
+ target.setType(6, Types.MinorType.INT);
+ target.setType(7, Types.MinorType.BIGINT);
+ target.setType(8, Types.MinorType.INT);
+ target.setType(9, Types.MinorType.BIGINT);
+ target.setType(10, Types.MinorType.INT);
+ target.setType(11, Types.MinorType.BIGINT);
+ target.setType(12, Types.MinorType.INT);
+ target.setType(13, Types.MinorType.BIGINT);
+ target.setType(14, Types.MinorType.INT);
+ target.setType(15, Types.MinorType.BIGINT);
+ target.setType(16, Types.MinorType.INT);
+ target.setType(17, Types.MinorType.BIGINT);
+ target.setType(18, Types.MinorType.INT);
+ target.setType(19, Types.MinorType.BIGINT);
+
+ IntVector targetIntVec = target.getIntVector();
+ ValueVectorDataPopulator.setVector(
+ targetIntVec,
+ 0, null, 1, null, 2, null, 3, null, 4, null, 5, null, 6, null, 7, null, 8, null, 9, null);
+ assertEquals(length1 * 2, targetIntVec.getValueCount());
+
+ BigIntVector targetBigIntVec = target.getBigIntVector();
+ ValueVectorDataPopulator.setVector(
+ targetBigIntVec,
+ null, 0L, null, 1L, null, 2L, null, 3L, null, 4L, null, 5L, null, 6L, null, 7L, null, 8L, null, 9L);
+ assertEquals(length1 * 2, targetBigIntVec.getValueCount());
+
+ target.setValueCount(length1 * 2);
+
+ // initialize the delta vector but leave it empty and unallocated
+ delta.setType(0, Types.MinorType.FLOAT4);
+ delta.setType(1, Types.MinorType.FLOAT4);
+ delta.setType(2, Types.MinorType.FLOAT4);
+ delta.setType(3, Types.MinorType.FLOAT4);
+ delta.setType(4, Types.MinorType.FLOAT4);
+
+ VectorAppender appender = new VectorAppender(target);
+ delta.accept(appender, null);
+
+ assertEquals(length1 * 2, target.getValueCount());
+
+ for (int i = 0; i < length1; i++) {
+ Object intObj = target.getObject(i * 2);
+ assertTrue(intObj instanceof Integer);
+ assertEquals(i, ((Integer) intObj).intValue());
+
+ Object longObj = target.getObject(i * 2 + 1);
+ assertTrue(longObj instanceof Long);
+ assertEquals(i, ((Long) longObj).longValue());
+ }
+ }
+ }
+
+ private DenseUnionVector getTargetVector() {
+ // create a vector, and populate it with values {1, 2, null, 10L}
+
+ final NullableIntHolder intHolder = new NullableIntHolder();
+ intHolder.isSet = 1;
+ final NullableBigIntHolder longHolder = new NullableBigIntHolder();
+ longHolder.isSet = 1;
+ final NullableFloat4Holder floatHolder = new NullableFloat4Holder();
+ floatHolder.isSet = 1;
+ DenseUnionVector targetVector = new DenseUnionVector("target vector", allocator, null, null);
+
+ targetVector.allocateNew();
+
+ while (targetVector.getValueCapacity() < 4) {
+ targetVector.reAlloc();
+ }
+
+ byte intTypeId = targetVector.registerNewTypeId(Field.nullable("", Types.MinorType.INT.getType()));
+ targetVector.setTypeId(0, intTypeId);
+ intHolder.value = 1;
+ targetVector.setSafe(0, intHolder);
+ targetVector.setTypeId(1, intTypeId);
+ intHolder.value = 2;
+ targetVector.setSafe(1, intHolder);
+ byte longTypeId = targetVector.registerNewTypeId(Field.nullable("", Types.MinorType.BIGINT.getType()));
+ targetVector.setTypeId(3, longTypeId);
+ longHolder.value = 10L;
+ targetVector.setSafe(3, longHolder);
+ targetVector.setValueCount(4);
+
+ assertVectorValuesEqual(targetVector, new Object[]{1, 2, null, 10L});
+ return targetVector;
+ }
+
+ private DenseUnionVector getDeltaVector() {
+ // create a vector, and populate it with values {7, null, 8L, 9.0f}
+
+ final NullableIntHolder intHolder = new NullableIntHolder();
+ intHolder.isSet = 1;
+ final NullableBigIntHolder longHolder = new NullableBigIntHolder();
+ longHolder.isSet = 1;
+ final NullableFloat4Holder floatHolder = new NullableFloat4Holder();
+ floatHolder.isSet = 1;
+
+ DenseUnionVector deltaVector = new DenseUnionVector("target vector", allocator, null, null);
+
+ while (deltaVector.getValueCapacity() < 4) {
+ deltaVector.reAlloc();
+ }
+ byte intTypeId = deltaVector.registerNewTypeId(Field.nullable("", Types.MinorType.INT.getType()));
+ deltaVector.setTypeId(0, intTypeId);
+ intHolder.value = 7;
+ deltaVector.setSafe(0, intHolder);
+ byte longTypeId = deltaVector.registerNewTypeId(Field.nullable("", Types.MinorType.BIGINT.getType()));
+ deltaVector.setTypeId(2, longTypeId);
+ longHolder.value = 8L;
+ deltaVector.setSafe(2, longHolder);
+ byte floatTypeId = deltaVector.registerNewTypeId(Field.nullable("", Types.MinorType.FLOAT4.getType()));
+ deltaVector.setTypeId(3, floatTypeId);
+ floatHolder.value = 9.0f;
+ deltaVector.setSafe(3, floatHolder);
+
+ deltaVector.setValueCount(4);
+
+ assertVectorValuesEqual(deltaVector, new Object[]{7, null, 8L, 9.0f});
+ return deltaVector;
+ }
+
+ @Test
+ public void testAppendDenseUnionVector() {
+ try (DenseUnionVector targetVector = getTargetVector();
+ DenseUnionVector deltaVector = getDeltaVector()) {
+
+ // append
+ VectorAppender appender = new VectorAppender(targetVector);
+ deltaVector.accept(appender, null);
+ assertVectorValuesEqual(targetVector, new Object[] {1, 2, null, 10L, 7, null, 8L, 9.0f});
+ }
+
+ // test reverse append
+ try (DenseUnionVector targetVector = getTargetVector();
+ DenseUnionVector deltaVector = getDeltaVector()) {
+
+ // append
+ VectorAppender appender = new VectorAppender(deltaVector);
+ targetVector.accept(appender, null);
+ assertVectorValuesEqual(deltaVector, new Object[] {7, null, 8L, 9.0f, 1, 2, null, 10L});
+ }
+ }
+
+ private DenseUnionVector getEmptyDeltaVector() {
+ // create a vector, but leave it empty and uninitialized
+ DenseUnionVector deltaVector = new DenseUnionVector("target vector", allocator, null, null);
+
+ byte intTypeId = deltaVector.registerNewTypeId(Field.nullable("", Types.MinorType.INT.getType()));
+ deltaVector.setTypeId(0, intTypeId);
+
+ byte longTypeId = deltaVector.registerNewTypeId(Field.nullable("", Types.MinorType.BIGINT.getType()));
+ deltaVector.setTypeId(2, longTypeId);
+
+ byte floatTypeId = deltaVector.registerNewTypeId(Field.nullable("", Types.MinorType.FLOAT4.getType()));
+ deltaVector.setTypeId(3, floatTypeId);
+
+ return deltaVector;
+ }
+
+ @Test
+ public void testAppendEmptyDenseUnionVector() {
+ try (DenseUnionVector targetVector = getTargetVector();
+ DenseUnionVector deltaVector = getEmptyDeltaVector()) {
+
+ // append
+ VectorAppender appender = new VectorAppender(targetVector);
+ deltaVector.accept(appender, null);
+ assertVectorValuesEqual(targetVector, new Object[] {1, 2, null, 10L});
+ }
+ }
+
+ /**
+ * Test appending dense union vectors where the child vectors do not match.
+ */
+ @Test
+ public void testAppendDenseUnionVectorMismatch() {
+ final NullableIntHolder intHolder = new NullableIntHolder();
+ intHolder.isSet = 1;
+
+ final NullableBigIntHolder longHolder = new NullableBigIntHolder();
+ longHolder.isSet = 1;
+
+ final NullableFloat4Holder floatHolder = new NullableFloat4Holder();
+ floatHolder.isSet = 1;
+
+ try (DenseUnionVector targetVector = new DenseUnionVector("target vector" , allocator, null, null);
+ DenseUnionVector deltaVector = new DenseUnionVector("target vector" , allocator, null, null)) {
+ targetVector.allocateNew();
+ deltaVector.allocateNew();
+
+ // populate the target vector with values {1, 2L}
+ while (targetVector.getValueCapacity() < 2) {
+ targetVector.reAlloc();
+ }
+ byte intTypeId = targetVector.registerNewTypeId(Field.nullable("", Types.MinorType.INT.getType()));
+ targetVector.setTypeId(0, intTypeId);
+ intHolder.value = 1;
+ targetVector.setSafe(0, intHolder);
+ byte longTypeId = targetVector.registerNewTypeId(Field.nullable("", Types.MinorType.BIGINT.getType()));
+ targetVector.setTypeId(1, longTypeId);
+ longHolder.value = 2L;
+ targetVector.setSafe(1, longHolder);
+ targetVector.setValueCount(2);
+
+ assertVectorValuesEqual(targetVector, new Object[] {1, 2L});
+
+ // populate the delta vector with values {3, 5.0f}
+ while (deltaVector.getValueCapacity() < 2) {
+ deltaVector.reAlloc();
+ }
+ intTypeId = deltaVector.registerNewTypeId(Field.nullable("", Types.MinorType.INT.getType()));
+ deltaVector.setTypeId(0, intTypeId);
+ intHolder.value = 3;
+ deltaVector.setSafe(0, intHolder);
+ byte floatTypeId = deltaVector.registerNewTypeId(Field.nullable("", Types.MinorType.FLOAT4.getType()));
+ deltaVector.setTypeId(1, floatTypeId);
+ floatHolder.value = 5.0f;
+ deltaVector.setSafe(1, floatHolder);
+ deltaVector.setValueCount(2);
+
+ assertVectorValuesEqual(deltaVector, new Object[] {3, 5.0f});
+
+ // append
+ VectorAppender appender = new VectorAppender(targetVector);
+ assertThrows(IllegalArgumentException.class,
+ () -> deltaVector.accept(appender, null));
+ }
+ }
+
+ @Test
+ public void testAppendVectorNegative() {
+ final int vectorLength = 10;
+ try (IntVector target = new IntVector("", allocator);
+ VarCharVector delta = new VarCharVector("", allocator)) {
+
+ target.allocateNew(vectorLength);
+ delta.allocateNew(vectorLength);
+
+ VectorAppender appender = new VectorAppender(target);
+
+ assertThrows(IllegalArgumentException.class,
+ () -> delta.accept(appender, null));
+ }
+ }
+
+ private void assertVectorValuesEqual(ValueVector vector, Object[] values) {
+ assertEquals(vector.getValueCount(), values.length);
+ for (int i = 0; i < values.length; i++) {
+ assertEquals(vector.getObject(i), values[i]);
+ }
+ }
+
+ public static void assertVectorsEqual(ValueVector vector1, ValueVector vector2) {
+ assertEquals(vector1.getValueCount(), vector2.getValueCount());
+
+ TypeEqualsVisitor typeEqualsVisitor = new TypeEqualsVisitor(vector1, false, false);
+ RangeEqualsVisitor equalsVisitor =
+ new RangeEqualsVisitor(vector1, vector2, (v1, v2) -> typeEqualsVisitor.equals(vector2));
+ assertTrue(equalsVisitor.rangeEquals(new Range(0, 0, vector1.getValueCount())));
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorBatchAppender.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorBatchAppender.java
new file mode 100644
index 000000000..799c25c0a
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorBatchAppender.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import static junit.framework.TestCase.assertEquals;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.testing.ValueVectorDataPopulator;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Test cases for {@link VectorBatchAppender}.
+ */
+public class TestVectorBatchAppender {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void prepare() {
+ allocator = new RootAllocator(1024 * 1024);
+ }
+
+ @After
+ public void shutdown() {
+ allocator.close();
+ }
+
+ @Test
+ public void testBatchAppendIntVector() {
+ final int length1 = 10;
+ final int length2 = 5;
+ final int length3 = 7;
+ try (IntVector target = new IntVector("", allocator);
+ IntVector delta1 = new IntVector("", allocator);
+ IntVector delta2 = new IntVector("", allocator)) {
+
+ target.allocateNew(length1);
+ delta1.allocateNew(length2);
+ delta2.allocateNew(length3);
+
+ ValueVectorDataPopulator.setVector(target, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9);
+ ValueVectorDataPopulator.setVector(delta1, 10, 11, 12, 13, 14);
+ ValueVectorDataPopulator.setVector(delta2, 15, 16, 17, 18, 19, 20, 21);
+
+ VectorBatchAppender.batchAppend(target, delta1, delta2);
+
+ assertEquals(length1 + length2 + length3, target.getValueCount());
+ for (int i = 0; i < target.getValueCount(); i++) {
+ assertEquals(i, target.get(i));
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorSchemaRootAppender.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorSchemaRootAppender.java
new file mode 100644
index 000000000..ab0ee3a20
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorSchemaRootAppender.java
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import static junit.framework.TestCase.assertEquals;
+import static org.apache.arrow.vector.util.TestVectorAppender.assertVectorsEqual;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.testing.ValueVectorDataPopulator;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Test cases for {@link VectorSchemaRootAppender}.
+ */
+public class TestVectorSchemaRootAppender {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void prepare() {
+ allocator = new RootAllocator(1024 * 1024);
+ }
+
+ @After
+ public void shutdown() {
+ allocator.close();
+ }
+
+ @Test
+ public void testVectorScehmaRootAppend() {
+ final int length1 = 5;
+ final int length2 = 3;
+ final int length3 = 2;
+
+ try (IntVector targetChild1 = new IntVector("t1", allocator);
+ VarCharVector targetChild2 = new VarCharVector("t2", allocator);
+ BigIntVector targetChild3 = new BigIntVector("t3", allocator);
+
+ IntVector deltaChildOne1 = new IntVector("do1", allocator);
+ VarCharVector deltaChildOne2 = new VarCharVector("do2", allocator);
+ BigIntVector deltaChildOne3 = new BigIntVector("do3", allocator);
+
+ IntVector deltaChildTwo1 = new IntVector("dt1", allocator);
+ VarCharVector deltaChildTwo2 = new VarCharVector("dt2", allocator);
+ BigIntVector deltaChildTwo3 = new BigIntVector("dt3", allocator)) {
+
+ ValueVectorDataPopulator.setVector(targetChild1, 0, 1, null, 3, 4);
+ ValueVectorDataPopulator.setVector(targetChild2, "zero", "one", null, "three", "four");
+ ValueVectorDataPopulator.setVector(targetChild3, 0L, 10L, null, 30L, 40L);
+ VectorSchemaRoot root1 = VectorSchemaRoot.of(targetChild1, targetChild2, targetChild3);
+ root1.setRowCount(length1);
+
+ ValueVectorDataPopulator.setVector(deltaChildOne1, 5, 6, 7);
+ ValueVectorDataPopulator.setVector(deltaChildOne2, "five", "six", "seven");
+ ValueVectorDataPopulator.setVector(deltaChildOne3, 50L, 60L, 70L);
+ VectorSchemaRoot root2 = VectorSchemaRoot.of(deltaChildOne1, deltaChildOne2, deltaChildOne3);
+ root2.setRowCount(length2);
+
+ ValueVectorDataPopulator.setVector(deltaChildTwo1, null, 9);
+ ValueVectorDataPopulator.setVector(deltaChildTwo2, null, "nine");
+ ValueVectorDataPopulator.setVector(deltaChildTwo3, null, 90L);
+ VectorSchemaRoot root3 = VectorSchemaRoot.of(deltaChildTwo1, deltaChildTwo2, deltaChildTwo3);
+ root3.setRowCount(length3);
+
+ VectorSchemaRootAppender.append(root1, root2, root3);
+ assertEquals(length1 + length2 + length3, root1.getRowCount());
+ assertEquals(3, root1.getFieldVectors().size());
+
+ try (IntVector expected1 = new IntVector("", allocator);
+ VarCharVector expected2 = new VarCharVector("", allocator);
+ BigIntVector expected3 = new BigIntVector("", allocator)) {
+
+ ValueVectorDataPopulator.setVector(expected1, 0, 1, null, 3, 4, 5, 6, 7, null, 9);
+ ValueVectorDataPopulator.setVector(
+ expected2, "zero", "one", null, "three", "four", "five", "six", "seven", null, "nine");
+ ValueVectorDataPopulator.setVector(expected3, 0L, 10L, null, 30L, 40L, 50L, 60L, 70L, null, 90L);
+
+ assertVectorsEqual(expected1, root1.getVector(0));
+ assertVectorsEqual(expected2, root1.getVector(1));
+ assertVectorsEqual(expected3, root1.getVector(2));
+ }
+ }
+ }
+
+ @Test
+ public void testRootWithDifferentChildCounts() {
+ try (IntVector targetChild1 = new IntVector("t1", allocator);
+ VarCharVector targetChild2 = new VarCharVector("t2", allocator);
+ BigIntVector targetChild3 = new BigIntVector("t3", allocator);
+
+ IntVector deltaChild1 = new IntVector("d1", allocator);
+ VarCharVector deltaChild2 = new VarCharVector("d2", allocator)) {
+
+ ValueVectorDataPopulator.setVector(targetChild1, 0, 1, null, 3, 4);
+ ValueVectorDataPopulator.setVector(targetChild2, "zero", "one", null, "three", "four");
+ ValueVectorDataPopulator.setVector(targetChild3, 0L, 10L, null, 30L, 40L);
+ VectorSchemaRoot root1 = VectorSchemaRoot.of(targetChild1, targetChild2, targetChild3);
+ root1.setRowCount(5);
+
+ ValueVectorDataPopulator.setVector(deltaChild1, 5, 6, 7);
+ ValueVectorDataPopulator.setVector(deltaChild2, "five", "six", "seven");
+ VectorSchemaRoot root2 = VectorSchemaRoot.of(deltaChild1, deltaChild2);
+ root2.setRowCount(3);
+
+ IllegalArgumentException exp = assertThrows(IllegalArgumentException.class,
+ () -> VectorSchemaRootAppender.append(root1, root2));
+
+ assertEquals("Vector schema roots have different numbers of child vectors.", exp.getMessage());
+ }
+ }
+
+ @Test
+ public void testRootWithDifferentChildTypes() {
+ try (IntVector targetChild1 = new IntVector("t1", allocator);
+ VarCharVector targetChild2 = new VarCharVector("t2", allocator);
+
+ IntVector deltaChild1 = new IntVector("d1", allocator);
+ VarCharVector deltaChild2 = new VarCharVector("d2", allocator)) {
+
+ ValueVectorDataPopulator.setVector(targetChild1, 0, 1, null, 3, 4);
+ ValueVectorDataPopulator.setVector(targetChild2, "zero", "one", null, "three", "four");
+ VectorSchemaRoot root1 = VectorSchemaRoot.of(targetChild1, targetChild2);
+ root1.setRowCount(5);
+
+ ValueVectorDataPopulator.setVector(deltaChild1, 5, 6, 7);
+ ValueVectorDataPopulator.setVector(deltaChild2, "five", "six", "seven");
+
+ // note that the child vectors are in reverse order
+ VectorSchemaRoot root2 = VectorSchemaRoot.of(deltaChild2, deltaChild1);
+ root2.setRowCount(3);
+
+ IllegalArgumentException exp = assertThrows(IllegalArgumentException.class,
+ () -> VectorSchemaRootAppender.append(root1, root2));
+
+ assertEquals("Vector schema roots have different schemas.", exp.getMessage());
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVector.java
new file mode 100644
index 000000000..2354b281e
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVector.java
@@ -0,0 +1,260 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.validate;
+
+import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector;
+import static org.apache.arrow.vector.util.ValueVectorUtility.validate;
+import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import java.nio.charset.Charset;
+import java.util.Arrays;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.LargeVarCharVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.complex.DenseUnionVector;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.complex.impl.NullableStructWriter;
+import org.apache.arrow.vector.holders.NullableFloat4Holder;
+import org.apache.arrow.vector.holders.NullableFloat8Holder;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestValidateVector {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ }
+
+ private static final Charset utf8Charset = Charset.forName("UTF-8");
+ private static final byte[] STR1 = "AAAAA1".getBytes(utf8Charset);
+ private static final byte[] STR2 = "BBBBBBBBB2".getBytes(utf8Charset);
+ private static final byte[] STR3 = "CCCC3".getBytes(utf8Charset);
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testBaseFixedWidthVector() {
+ try (final IntVector vector = new IntVector("v", allocator)) {
+ validate(vector);
+ setVector(vector, 1, 2, 3);
+ validate(vector);
+
+ vector.getDataBuffer().capacity(0);
+ ValidateUtil.ValidateException e = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validate(vector));
+ assertTrue(e.getMessage().contains("Not enough capacity for fixed width data buffer"));
+ }
+ }
+
+ @Test
+ public void testBaseVariableWidthVector() {
+ try (final VarCharVector vector = new VarCharVector("v", allocator)) {
+ validate(vector);
+ setVector(vector, STR1, STR2, STR3);
+ validate(vector);
+
+ vector.getDataBuffer().capacity(0);
+ ValidateUtil.ValidateException e = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validate(vector));
+ assertTrue(e.getMessage().contains("Not enough capacity for data buffer"));
+ }
+ }
+
+ @Test
+ public void testBaseLargeVariableWidthVector() {
+ try (final LargeVarCharVector vector = new LargeVarCharVector("v", allocator)) {
+ validate(vector);
+ setVector(vector, STR1, STR2, null, STR3);
+ validate(vector);
+
+ vector.getDataBuffer().capacity(0);
+ ValidateUtil.ValidateException e = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validate(vector));
+ assertTrue(e.getMessage().contains("Not enough capacity for data buffer"));
+ }
+ }
+
+ @Test
+ public void testListVector() {
+ try (final ListVector vector = ListVector.empty("v", allocator)) {
+ validate(vector);
+ setVector(vector, Arrays.asList(1, 2, 3), Arrays.asList(4, 5));
+ validate(vector);
+
+ vector.getDataVector().setValueCount(3);
+ ValidateUtil.ValidateException e = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validate(vector));
+ assertTrue(e.getMessage().contains("Inner vector does not contain enough elements."));
+ }
+ }
+
+ @Test
+ public void testLargeListVector() {
+ try (final LargeListVector vector = LargeListVector.empty("v", allocator)) {
+ validate(vector);
+ setVector(vector, Arrays.asList(1, 2, 3, 4), Arrays.asList(5, 6));
+ validate(vector);
+
+ vector.getDataVector().setValueCount(4);
+ ValidateUtil.ValidateException e = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validate(vector));
+ assertTrue(e.getMessage().contains("Inner vector does not contain enough elements."));
+ }
+ }
+
+ @Test
+ public void testFixedSizeListVector() {
+ try (final FixedSizeListVector vector = FixedSizeListVector.empty("v", 3, allocator)) {
+ validate(vector);
+ setVector(vector, Arrays.asList(1, 2, 3), Arrays.asList(4, 5, 6));
+ validate(vector);
+
+ vector.getDataVector().setValueCount(3);
+ ValidateUtil.ValidateException e = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validate(vector));
+ assertTrue(e.getMessage().contains("Inner vector does not contain enough elements."));
+ }
+ }
+
+ @Test
+ public void testStructVectorRangeEquals() {
+ try (final StructVector vector = StructVector.empty("struct", allocator)) {
+ vector.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ vector.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class);
+
+ validate(vector);
+
+ NullableStructWriter writer = vector.getWriter();
+ writer.allocate();
+
+ writeStructVector(writer, 1, 10L);
+ writeStructVector(writer, 2, 20L);
+ writeStructVector(writer, 3, 30L);
+ writeStructVector(writer, 4, 40L);
+ writeStructVector(writer, 5, 50L);
+ writer.setValueCount(5);
+
+ vector.getChild("f0").setValueCount(2);
+ ValidateUtil.ValidateException e = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validate(vector));
+ assertTrue(e.getMessage().contains("Struct vector length not equal to child vector length"));
+
+ vector.getChild("f0").setValueCount(5);
+ validate(vector);
+
+ vector.getChild("f0").getDataBuffer().capacity(0);
+ ValidateUtil.ValidateException e2 = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validate(vector));
+ assertTrue(e2.getMessage().contains("Not enough capacity for fixed width data buffer"));
+ }
+ }
+
+ @Test
+ public void testUnionVector() {
+ try (final UnionVector vector = UnionVector.empty("union", allocator)) {
+ validate(vector);
+
+ final NullableFloat4Holder float4Holder = new NullableFloat4Holder();
+ float4Holder.value = 1.01f;
+ float4Holder.isSet = 1;
+
+ final NullableFloat8Holder float8Holder = new NullableFloat8Holder();
+ float8Holder.value = 2.02f;
+ float8Holder.isSet = 1;
+
+ vector.setType(0, Types.MinorType.FLOAT4);
+ vector.setSafe(0, float4Holder);
+ vector.setType(1, Types.MinorType.FLOAT8);
+ vector.setSafe(1, float8Holder);
+ vector.setValueCount(2);
+
+ validate(vector);
+
+ vector.getChildrenFromFields().get(0).setValueCount(1);
+ ValidateUtil.ValidateException e1 = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validate(vector));
+ assertTrue(e1.getMessage().contains("Union vector length not equal to child vector length"));
+
+ vector.getChildrenFromFields().get(0).setValueCount(2);
+ validate(vector);
+
+ vector.getChildrenFromFields().get(0).getDataBuffer().capacity(0);
+ ValidateUtil.ValidateException e2 = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validate(vector));
+ assertTrue(e2.getMessage().contains("Not enough capacity for fixed width data buffer"));
+ }
+ }
+
+ @Test
+ public void testDenseUnionVector() {
+ try (final DenseUnionVector vector = DenseUnionVector.empty("union", allocator)) {
+ validate(vector);
+
+ final NullableFloat4Holder float4Holder = new NullableFloat4Holder();
+ float4Holder.value = 1.01f;
+ float4Holder.isSet = 1;
+
+ final NullableFloat8Holder float8Holder = new NullableFloat8Holder();
+ float8Holder.value = 2.02f;
+ float8Holder.isSet = 1;
+
+ byte float4TypeId = vector.registerNewTypeId(Field.nullable("", Types.MinorType.FLOAT4.getType()));
+ byte float8TypeId = vector.registerNewTypeId(Field.nullable("", Types.MinorType.FLOAT8.getType()));
+
+ vector.setTypeId(0, float4TypeId);
+ vector.setSafe(0, float4Holder);
+ vector.setTypeId(1, float8TypeId);
+ vector.setSafe(1, float8Holder);
+ vector.setValueCount(2);
+
+ validate(vector);
+
+ vector.getChildrenFromFields().get(0).getDataBuffer().capacity(0);
+ ValidateUtil.ValidateException e = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validate(vector));
+ assertTrue(e.getMessage().contains("Not enough capacity for fixed width data buffer"));
+ }
+ }
+
+ private void writeStructVector(NullableStructWriter writer, int value1, long value2) {
+ writer.start();
+ writer.integer("f0").writeInt(value1);
+ writer.bigInt("f1").writeBigInt(value2);
+ writer.end();
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorFull.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorFull.java
new file mode 100644
index 000000000..4241a0d9c
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorFull.java
@@ -0,0 +1,234 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.validate;
+
+import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector;
+import static org.apache.arrow.vector.util.ValueVectorUtility.validateFull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import java.util.Arrays;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.LargeVarCharVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.complex.DenseUnionVector;
+import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.holders.NullableFloat4Holder;
+import org.apache.arrow.vector.holders.NullableFloat8Holder;
+import org.apache.arrow.vector.testing.ValueVectorDataPopulator;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestValidateVectorFull {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testBaseVariableWidthVector() {
+ try (final VarCharVector vector = new VarCharVector("v", allocator)) {
+ validateFull(vector);
+ setVector(vector, "aaa", "bbb", "ccc");
+ validateFull(vector);
+
+ ArrowBuf offsetBuf = vector.getOffsetBuffer();
+ offsetBuf.setInt(0, 100);
+ offsetBuf.setInt(4, 50);
+
+ ValidateUtil.ValidateException e = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validateFull(vector));
+ assertTrue(e.getMessage().contains("The values in positions 0 and 1 of the offset buffer are decreasing"));
+ }
+ }
+
+ @Test
+ public void testBaseLargeVariableWidthVector() {
+ try (final LargeVarCharVector vector = new LargeVarCharVector("v", allocator)) {
+ validateFull(vector);
+ setVector(vector, "aaa", "bbb", null, "ccc");
+ validateFull(vector);
+
+ ArrowBuf offsetBuf = vector.getOffsetBuffer();
+ offsetBuf.setLong(0, 100);
+ offsetBuf.setLong(8, 50);
+
+ ValidateUtil.ValidateException e = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validateFull(vector));
+ assertTrue(e.getMessage().contains("The values in positions 0 and 1 of the large offset buffer are decreasing"));
+ }
+ }
+
+ @Test
+ public void testListVector() {
+ try (final ListVector vector = ListVector.empty("v", allocator)) {
+ validateFull(vector);
+ setVector(vector, Arrays.asList(1, 2, 3), Arrays.asList(4, 5), Arrays.asList(6, 7, 8, 9));
+ validateFull(vector);
+
+ ArrowBuf offsetBuf = vector.getOffsetBuffer();
+ offsetBuf.setInt(0, 100);
+ offsetBuf.setInt(8, 50);
+
+ ValidateUtil.ValidateException e = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validateFull(vector));
+ assertTrue(e.getMessage().contains("The values in positions 0 and 1 of the offset buffer are decreasing"));
+ }
+ }
+
+ @Test
+ public void testLargeListVector() {
+ try (final LargeListVector vector = LargeListVector.empty("v", allocator)) {
+ validateFull(vector);
+ setVector(vector, Arrays.asList(1, 2, 3), Arrays.asList(4, 5), Arrays.asList(6, 7, 8, 9));
+ validateFull(vector);
+
+ ArrowBuf offsetBuf = vector.getOffsetBuffer();
+ offsetBuf.setLong(0, 100);
+ offsetBuf.setLong(16, 50);
+
+ ValidateUtil.ValidateException e = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validateFull(vector));
+ assertTrue(e.getMessage().contains("The values in positions 0 and 1 of the large offset buffer are decreasing"));
+ }
+ }
+
+ @Test
+ public void testStructVectorRangeEquals() {
+ try (final StructVector vector = StructVector.empty("struct", allocator)) {
+ IntVector intVector =
+ vector.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ VarCharVector strVector =
+ vector.addOrGet("f1", FieldType.nullable(new ArrowType.Utf8()), VarCharVector.class);
+
+ validateFull(vector);
+ validateFull(intVector);
+ validateFull(strVector);
+
+ ValueVectorDataPopulator.setVector(intVector, 1, 2, 3, 4, 5);
+ ValueVectorDataPopulator.setVector(strVector, "a", "b", "c", "d", "e");
+ vector.setValueCount(5);
+
+ validateFull(vector);
+ validateFull(intVector);
+ validateFull(strVector);
+
+ ArrowBuf offsetBuf = strVector.getOffsetBuffer();
+ offsetBuf.setInt(0, 100);
+ offsetBuf.setInt(8, 50);
+
+ ValidateUtil.ValidateException e = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validateFull(strVector));
+ assertTrue(e.getMessage().contains("The values in positions 0 and 1 of the offset buffer are decreasing"));
+
+ e = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validateFull(vector));
+ assertTrue(e.getMessage().contains("The values in positions 0 and 1 of the offset buffer are decreasing"));
+ }
+ }
+
+ @Test
+ public void testUnionVector() {
+ try (final UnionVector vector = UnionVector.empty("union", allocator)) {
+ validateFull(vector);
+
+ final NullableFloat4Holder float4Holder = new NullableFloat4Holder();
+ float4Holder.value = 1.01f;
+ float4Holder.isSet = 1;
+
+ final NullableFloat8Holder float8Holder = new NullableFloat8Holder();
+ float8Holder.value = 2.02f;
+ float8Holder.isSet = 1;
+
+ vector.setType(0, Types.MinorType.FLOAT4);
+ vector.setSafe(0, float4Holder);
+ vector.setType(1, Types.MinorType.FLOAT8);
+ vector.setSafe(1, float8Holder);
+ vector.setValueCount(2);
+
+ validateFull(vector);
+
+ // negative type id
+ vector.getTypeBuffer().setByte(0, -1);
+
+ ValidateUtil.ValidateException e = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validateFull(vector));
+ assertTrue(e.getMessage().contains("The type id at position 0 is negative"));
+ }
+ }
+
+ @Test
+ public void testDenseUnionVector() {
+ try (final DenseUnionVector vector = DenseUnionVector.empty("union", allocator)) {
+ validateFull(vector);
+
+ final NullableFloat4Holder float4Holder = new NullableFloat4Holder();
+ float4Holder.value = 1.01f;
+ float4Holder.isSet = 1;
+
+ final NullableFloat8Holder float8Holder = new NullableFloat8Holder();
+ float8Holder.value = 2.02f;
+ float8Holder.isSet = 1;
+
+ byte float4TypeId = vector.registerNewTypeId(Field.nullable("", Types.MinorType.FLOAT4.getType()));
+ byte float8TypeId = vector.registerNewTypeId(Field.nullable("", Types.MinorType.FLOAT8.getType()));
+
+ vector.setTypeId(0, float4TypeId);
+ vector.setSafe(0, float4Holder);
+ vector.setTypeId(1, float8TypeId);
+ vector.setSafe(1, float8Holder);
+ vector.setValueCount(2);
+
+ validateFull(vector);
+
+ ValueVector subVector = vector.getVectorByType(float4TypeId);
+ assertTrue(subVector instanceof Float4Vector);
+ assertEquals(1, subVector.getValueCount());
+
+ // shrink sub-vector
+ subVector.setValueCount(0);
+
+ ValidateUtil.ValidateException e = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validateFull(vector));
+ assertTrue(e.getMessage().contains("Dense union vector offset exceeds sub-vector boundary"));
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorSchemaRoot.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorSchemaRoot.java
new file mode 100644
index 000000000..1885fb21f
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorSchemaRoot.java
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.validate;
+
+import static org.apache.arrow.vector.util.ValueVectorUtility.validate;
+import static org.apache.arrow.vector.util.ValueVectorUtility.validateFull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.testing.ValueVectorDataPopulator;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestValidateVectorSchemaRoot {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testValidatePositive() {
+ try (IntVector intVector = new IntVector("int vector", allocator);
+ VarCharVector strVector = new VarCharVector("var char vector", allocator)) {
+
+ VectorSchemaRoot root = VectorSchemaRoot.of(intVector, strVector);
+
+ validate(root);
+ validateFull(root);
+
+ ValueVectorDataPopulator.setVector(intVector, 1, 2, 3, 4, 5);
+ ValueVectorDataPopulator.setVector(strVector, "a", "b", "c", "d", "e");
+ root.setRowCount(5);
+
+ validate(root);
+ validateFull(root);
+ }
+ }
+
+ @Test
+ public void testValidateNegative() {
+ try (IntVector intVector = new IntVector("int vector", allocator);
+ VarCharVector strVector = new VarCharVector("var char vector", allocator)) {
+
+ VectorSchemaRoot root = VectorSchemaRoot.of(intVector, strVector);
+
+ ValueVectorDataPopulator.setVector(intVector, 1, 2, 3, 4, 5);
+ ValueVectorDataPopulator.setVector(strVector, "a", "b", "c", "d", "e");
+
+ // validate mismatching value counts
+ root.setRowCount(4);
+ intVector.setValueCount(5);
+ strVector.setValueCount(5);
+ ValidateUtil.ValidateException e = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validate(root));
+ assertTrue(e.getMessage().contains("Child vector and vector schema root have different value counts"));
+ e = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validateFull(root));
+ assertTrue(e.getMessage().contains("Child vector and vector schema root have different value counts"));
+
+ // valid problems with the child vector
+ root.setRowCount(5);
+ ArrowBuf offsetBuf = strVector.getOffsetBuffer();
+ offsetBuf.setInt(0, 100);
+ offsetBuf.setInt(8, 50);
+ validate(root);
+ e = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validateFull(root));
+ assertTrue(e.getMessage().contains("The values in positions 0 and 1 of the offset buffer are decreasing"));
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorTypeVisitor.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorTypeVisitor.java
new file mode 100644
index 000000000..7a0f12f7a
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorTypeVisitor.java
@@ -0,0 +1,301 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.validate;
+
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import java.util.function.Supplier;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.BitVector;
+import org.apache.arrow.vector.DateDayVector;
+import org.apache.arrow.vector.DateMilliVector;
+import org.apache.arrow.vector.DecimalVector;
+import org.apache.arrow.vector.DurationVector;
+import org.apache.arrow.vector.FixedSizeBinaryVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.IntervalDayVector;
+import org.apache.arrow.vector.IntervalYearVector;
+import org.apache.arrow.vector.LargeVarBinaryVector;
+import org.apache.arrow.vector.LargeVarCharVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.SmallIntVector;
+import org.apache.arrow.vector.TimeMicroVector;
+import org.apache.arrow.vector.TimeMilliVector;
+import org.apache.arrow.vector.TimeNanoVector;
+import org.apache.arrow.vector.TimeSecVector;
+import org.apache.arrow.vector.TimeStampMicroTZVector;
+import org.apache.arrow.vector.TimeStampMicroVector;
+import org.apache.arrow.vector.TimeStampMilliTZVector;
+import org.apache.arrow.vector.TimeStampMilliVector;
+import org.apache.arrow.vector.TimeStampNanoTZVector;
+import org.apache.arrow.vector.TimeStampNanoVector;
+import org.apache.arrow.vector.TimeStampSecTZVector;
+import org.apache.arrow.vector.TimeStampSecVector;
+import org.apache.arrow.vector.TinyIntVector;
+import org.apache.arrow.vector.UInt1Vector;
+import org.apache.arrow.vector.UInt2Vector;
+import org.apache.arrow.vector.UInt4Vector;
+import org.apache.arrow.vector.UInt8Vector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.VarBinaryVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.complex.DenseUnionVector;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Test cases for {@link ValidateVectorTypeVisitor}.
+ */
+public class TestValidateVectorTypeVisitor {
+
+ private BufferAllocator allocator;
+
+ private ValidateVectorTypeVisitor visitor = new ValidateVectorTypeVisitor();
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ private void testPositiveCase(Supplier<ValueVector> vectorGenerator) {
+ try (ValueVector vector = vectorGenerator.get();) {
+ vector.accept(visitor, null);
+ }
+ }
+
+ private void testNegativeCase(Supplier<ValueVector> vectorGenerator) {
+ try (ValueVector vector = vectorGenerator.get()) {
+ assertThrows(ValidateUtil.ValidateException.class, () -> {
+ vector.accept(visitor, null);
+ });
+ }
+ }
+
+ @Test
+ public void testFixedWidthVectorsPositive() {
+ // integer vectors
+ testPositiveCase(() -> new TinyIntVector("vector", allocator));
+ testPositiveCase(() -> new SmallIntVector("vector", allocator));
+ testPositiveCase(() -> new IntVector("vector", allocator));
+ testPositiveCase(() -> new BigIntVector("vector", allocator));
+ testPositiveCase(() -> new UInt1Vector("vector", allocator));
+ testPositiveCase(() -> new UInt2Vector("vector", allocator));
+ testPositiveCase(() -> new UInt4Vector("vector", allocator));
+ testPositiveCase(() -> new UInt8Vector("vector", allocator));
+
+ testPositiveCase(() -> new BitVector("vector", allocator));
+ testPositiveCase(() -> new DecimalVector("vector", allocator, 30, 16));
+
+ // date vectors
+ testPositiveCase(() -> new DateDayVector("vector", allocator));
+ testPositiveCase(() -> new DateMilliVector("vector", allocator));
+
+ testPositiveCase(() -> new DurationVector(
+ "vector", FieldType.nullable(new ArrowType.Duration(TimeUnit.SECOND)), allocator));
+
+ // float vectors
+ testPositiveCase(() -> new Float4Vector("vector", allocator));
+ testPositiveCase(() -> new Float8Vector("vector", allocator));
+
+ // interval vectors
+ testPositiveCase(() -> new IntervalDayVector("vector", allocator));
+ testPositiveCase(() -> new IntervalYearVector("vector", allocator));
+
+ // time vectors
+ testPositiveCase(() -> new TimeMicroVector("vector", allocator));
+ testPositiveCase(() -> new TimeMilliVector("vector", allocator));
+ testPositiveCase(() -> new TimeMicroVector("vector", allocator));
+ testPositiveCase(() -> new TimeSecVector("vector", allocator));
+
+ // time stamp vectors
+ testPositiveCase(() -> new TimeStampMicroTZVector("vector", allocator, "cn"));
+ testPositiveCase(() -> new TimeStampMicroVector("vector", allocator));
+ testPositiveCase(() -> new TimeStampMilliTZVector("vector", allocator, "cn"));
+ testPositiveCase(() -> new TimeStampMilliVector("vector", allocator));
+ testPositiveCase(() -> new TimeStampNanoTZVector("vector", allocator, "cn"));
+ testPositiveCase(() -> new TimeStampNanoVector("vector", allocator));
+ testPositiveCase(() -> new TimeStampSecTZVector("vector", allocator, "cn"));
+ testPositiveCase(() -> new TimeStampSecVector("vector", allocator));
+
+ testPositiveCase(() -> new FixedSizeBinaryVector("vector", allocator, 5));
+ }
+
+ @Test
+ public void testFixedWidthVectorsNegative() {
+ // integer vectors
+ testNegativeCase(
+ () -> new TinyIntVector("vector", FieldType.nullable(Types.MinorType.INT.getType()), allocator));
+ testNegativeCase(
+ () -> new SmallIntVector("vector", FieldType.nullable(Types.MinorType.INT.getType()), allocator));
+ testNegativeCase(
+ () -> new BigIntVector("vector", FieldType.nullable(Types.MinorType.SMALLINT.getType()), allocator));
+ testNegativeCase(
+ () -> new BigIntVector("vector", FieldType.nullable(Types.MinorType.SMALLINT.getType()), allocator));
+ testNegativeCase(
+ () -> new UInt1Vector("vector", FieldType.nullable(Types.MinorType.SMALLINT.getType()), allocator));
+ testNegativeCase(
+ () -> new UInt2Vector("vector", FieldType.nullable(Types.MinorType.SMALLINT.getType()), allocator));
+ testNegativeCase(
+ () -> new UInt4Vector("vector", FieldType.nullable(Types.MinorType.SMALLINT.getType()), allocator));
+ testNegativeCase(
+ () -> new UInt8Vector("vector", FieldType.nullable(Types.MinorType.SMALLINT.getType()), allocator));
+
+ testNegativeCase(
+ () -> new BitVector("vector", FieldType.nullable(Types.MinorType.BIGINT.getType()), allocator));
+ testNegativeCase(
+ () -> new DecimalVector("vector", allocator, 30, -16));
+
+ // date vectors
+ testNegativeCase(
+ () -> new DateDayVector("vector", FieldType.nullable(Types.MinorType.FLOAT4.getType()), allocator));
+ testNegativeCase(
+ () -> new DateMilliVector("vector", FieldType.nullable(Types.MinorType.BIGINT.getType()), allocator));
+
+ // float pont vectors
+ testNegativeCase(
+ () -> new Float4Vector("vector", FieldType.nullable(Types.MinorType.BIGINT.getType()), allocator));
+ testNegativeCase(
+ () -> new Float8Vector("vector", FieldType.nullable(Types.MinorType.BIGINT.getType()), allocator));
+
+ // interval vectors
+ testNegativeCase(
+ () -> new IntervalDayVector("vector", FieldType.nullable(Types.MinorType.INT.getType()), allocator));
+ testNegativeCase(
+ () -> new IntervalYearVector("vector", FieldType.nullable(Types.MinorType.BIGINT.getType()), allocator));
+
+ // time vectors
+ testNegativeCase(
+ () -> new TimeMilliVector("vector", FieldType.nullable(Types.MinorType.BIGINT.getType()), allocator));
+ testNegativeCase(
+ () -> new TimeMicroVector("vector", FieldType.nullable(Types.MinorType.BIGINT.getType()), allocator));
+ testNegativeCase(
+ () -> new TimeNanoVector("vector", FieldType.nullable(Types.MinorType.BIGINT.getType()), allocator));
+ testNegativeCase(
+ () -> new TimeSecVector("vector", FieldType.nullable(Types.MinorType.BIGINT.getType()), allocator));
+
+ // time stamp vectors
+ testNegativeCase(
+ () -> new TimeStampMicroTZVector("vector", allocator, null));
+ testNegativeCase(
+ () -> new TimeStampMicroVector("vector", FieldType.nullable(Types.MinorType.BIGINT.getType()), allocator));
+ testNegativeCase(
+ () -> new TimeStampMilliTZVector("vector", allocator, null));
+ testNegativeCase(
+ () -> new TimeStampMilliVector("vector", FieldType.nullable(Types.MinorType.BIGINT.getType()), allocator));
+ testNegativeCase(
+ () -> new TimeStampNanoTZVector("vector", allocator, null));
+ testNegativeCase(
+ () -> new TimeStampNanoVector("vector", FieldType.nullable(Types.MinorType.BIGINT.getType()), allocator));
+ testNegativeCase(
+ () -> new TimeStampSecTZVector("vector", allocator, null));
+ testNegativeCase(
+ () -> new TimeStampSecVector("vector", FieldType.nullable(Types.MinorType.BIGINT.getType()), allocator));
+ }
+
+ @Test
+ public void testVariableWidthVectorsPositive() {
+ testPositiveCase(() -> new VarCharVector("vector", allocator));
+ testPositiveCase(() -> new VarBinaryVector("vector", allocator));
+ }
+
+ @Test
+ public void testVariableWidthVectorsNegative() {
+ testNegativeCase(
+ () -> new VarCharVector("vector", FieldType.nullable(Types.MinorType.INT.getType()), allocator));
+ testNegativeCase(
+ () -> new VarBinaryVector("vector", FieldType.nullable(Types.MinorType.INT.getType()), allocator));
+ }
+
+ @Test
+ public void testLargeVariableWidthVectorsPositive() {
+ testPositiveCase(() -> new LargeVarCharVector("vector", allocator));
+ testPositiveCase(() -> new LargeVarBinaryVector("vector", allocator));
+ }
+
+ @Test
+ public void testLargeVariableWidthVectorsNegative() {
+ testNegativeCase(
+ () -> new LargeVarCharVector("vector", FieldType.nullable(Types.MinorType.INT.getType()), allocator));
+ testNegativeCase(
+ () -> new LargeVarBinaryVector("vector", FieldType.nullable(Types.MinorType.INT.getType()), allocator));
+ }
+
+ @Test
+ public void testListVector() {
+ testPositiveCase(() -> ListVector.empty("vector", allocator));
+
+ testNegativeCase(
+ () -> new ListVector("vector", allocator, FieldType.nullable(Types.MinorType.INT.getType()), null));
+ }
+
+ @Test
+ public void testLargeListVector() {
+ testPositiveCase(() -> LargeListVector.empty("vector", allocator));
+
+ testNegativeCase(
+ () -> new LargeListVector("vector", allocator, FieldType.nullable(Types.MinorType.INT.getType()), null));
+ }
+
+ @Test
+ public void testFixedSizeListVector() {
+ testPositiveCase(() -> FixedSizeListVector.empty("vector", 10, allocator));
+ }
+
+ @Test
+ public void testStructVector() {
+ testPositiveCase(() -> StructVector.empty("vector", allocator));
+
+ testNegativeCase(
+ () -> new StructVector("vector", allocator, FieldType.nullable(Types.MinorType.INT.getType()), null));
+ }
+
+ @Test
+ public void testUnionVector() {
+ testPositiveCase(() -> UnionVector.empty("vector", allocator));
+ }
+
+ @Test
+ public void testDenseUnionVector() {
+ testPositiveCase(() -> DenseUnionVector.empty("vector", allocator));
+ }
+
+ @Test
+ public void testNullVector() {
+ testPositiveCase(() -> new NullVector("null vec"));
+ }
+}
diff --git a/src/arrow/java/vector/src/test/resources/logback.xml b/src/arrow/java/vector/src/test/resources/logback.xml
new file mode 100644
index 000000000..f9e449fa6
--- /dev/null
+++ b/src/arrow/java/vector/src/test/resources/logback.xml
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+ license agreements. See the NOTICE file distributed with this work for additional
+ information regarding copyright ownership. The ASF licenses this file to
+ You under the Apache License, Version 2.0 (the "License"); you may not use
+ this file except in compliance with the License. You may obtain a copy of
+ the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+ by applicable law or agreed to in writing, software distributed under the
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ OF ANY KIND, either express or implied. See the License for the specific
+ language governing permissions and limitations under the License. -->
+
+<configuration>
+ <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
+ <!-- encoders are assigned the type
+ ch.qos.logback.classic.encoder.PatternLayoutEncoder by default -->
+ <encoder>
+ <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
+ </encoder>
+ </appender>
+
+ <statusListener class="ch.qos.logback.core.status.NopStatusListener"/>
+ <logger name="org.apache.arrow" additivity="false">
+ <level value="info" />
+ <appender-ref ref="STDOUT" />
+ </logger>
+
+</configuration>